LLVM: lib/Target/AMDGPU/GCNSchedStrategy.cpp Source File

//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This contains a MachineSchedStrategy implementation for maximizing wave

/// occupancy on GCN hardware.

///

/// This pass will apply multiple scheduling stages to the same function.

/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual

/// entry point for the scheduling of those regions is

/// GCNScheduleDAGMILive::runSchedStages.


/// Generally, the reason for having multiple scheduling stages is to account

/// for the kernel-wide effect of register usage on occupancy.  Usually, only a

/// few scheduling regions will have register pressure high enough to limit

/// occupancy for the kernel, so constraints can be relaxed to improve ILP in

/// other regions.

///

//===----------------------------------------------------------------------===//


#include "GCNSchedStrategy.h"

#include "AMDGPUIGroupLP.h"

#include "GCNRegPressure.h"

#include "SIMachineFunctionInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/CodeGen/RegisterClassInfo.h"

#include "llvm/MC/LaneBitmask.h"

#include "llvm/Support/ErrorHandling.h"


#define DEBUG_TYPE "machine-scheduler"


using namespace llvm;


static cl::opt<bool> DisableUnclusterHighRP(

    "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,

    cl::desc("Disable unclustered high register pressure "

             "reduction scheduling stage."),

    cl::init(false));


static cl::opt<bool> DisableClusteredLowOccupancy(

    "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,

    cl::desc("Disable clustered low occupancy "

             "rescheduling for ILP scheduling stage."),

    cl::init(false));


static cl::opt<unsigned> ScheduleMetricBias(

    "amdgpu-schedule-metric-bias", cl::Hidden,

    cl::desc(

        "Sets the bias which adds weight to occupancy vs latency. Set it to "

        "100 to chase the occupancy only."),

    cl::init(10));


static cl::opt<bool>

    RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,

               cl::desc("Relax occupancy targets for kernels which are memory "

                        "bound (amdgpu-membound-threshold), or "

                        "Wave Limited (amdgpu-limit-wave-threshold)."),

               cl::init(false));


static cl::opt<bool> GCNTrackers(

    "amdgpu-use-amdgpu-trackers", cl::Hidden,

    cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),

    cl::init(false));


const unsigned ScheduleMetrics::ScaleFactor = 100;


GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)

    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),

      DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {

}


void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {

  GenericScheduler::initialize(DAG);


  MF = &DAG->MF;


  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();


  SGPRExcessLimit =

      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);

  VGPRExcessLimit =

      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);


  SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

  // Set the initial TargetOccupnacy to the maximum occupancy that we can

  // achieve for this function. This effectively sets a lower bound on the

  // 'Critical' register limits in the scheduler.

  // Allow for lower occupancy targets if kernel is wave limited or memory

  // bound, and using the relaxed occupancy feature.

  TargetOccupancy =

      RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();

  SGPRCriticalLimit =

      std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);


  if (!KnownExcessRP) {

    VGPRCriticalLimit = std::min(

        ST.getMaxNumVGPRs(TargetOccupancy, MFI.getDynamicVGPRBlockSize()),

        VGPRExcessLimit);

  } else {

    // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except

    // returns a reasonably small number for targets with lots of VGPRs, such

    // as GFX10 and GFX11.

    LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "

                         "VGPRCriticalLimit calculation method.\n");

    unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();

    unsigned Granule =

        AMDGPU::IsaInfo::getVGPRAllocGranule(&ST, DynamicVGPRBlockSize);

    unsigned Addressable =

        AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST, DynamicVGPRBlockSize);

    unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);

    VGPRBudget = std::max(VGPRBudget, Granule);

    VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);

  }


  // Subtract error margin and bias from register limits and avoid overflow.

  SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);

  VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);

  SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);

  VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);


  LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit

                    << ", VGPRExcessLimit = " << VGPRExcessLimit

                    << ", SGPRCriticalLimit = " << SGPRCriticalLimit

                    << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");

}


/// Checks whether \p SU can use the cached DAG pressure diffs to compute the

/// current register pressure.

///

/// This works for the common case, but it has a few exceptions that have been

/// observed through trial and error:

///   - Explicit physical register operands

///   - Subregister definitions

///

/// In both of those cases, PressureDiff doesn't represent the actual pressure,

/// and querying LiveIntervals through the RegPressureTracker is needed to get

/// an accurate value.

///

/// We should eventually only use PressureDiff for maximum performance, but this

/// already allows 80% of SUs to take the fast path without changing scheduling

/// at all. Further changes would either change scheduling, or require a lot

/// more logic to recover an accurate pressure estimate from the PressureDiffs.

static bool canUsePressureDiffs(const SUnit &SU) {

  if (!SU.isInstr())

    return false;


  // Cannot use pressure diffs for subregister defs or with physregs, it's

  // imprecise in both cases.

  for (const auto &Op : SU.getInstr()->operands()) {

    if (!Op.isReg() || Op.isImplicit())

      continue;

    if (Op.getReg().isPhysical() ||

        (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))

      return false;

  }

  return true;

}


static void getRegisterPressures(

    bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU,

    std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,

    GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker,

    ScheduleDAGMI *DAG, const SIRegisterInfo *SRI) {

  // getDownwardPressure() and getUpwardPressure() make temporary changes to

  // the tracker, so we need to pass those function a non-const copy.

  RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);

  if (!GCNTrackers) {

    AtTop

        ? TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure)

        : TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);


    return;

  }


  // GCNTrackers

  Pressure.resize(4, 0);

  MachineInstr *MI = SU->getInstr();

  GCNRegPressure NewPressure;

  if (AtTop) {

    GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);

    NewPressure = TempDownwardTracker.bumpDownwardPressure(MI, SRI);

  } else {

    GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);

    TempUpwardTracker.recede(*MI);

    NewPressure = TempUpwardTracker.getPressure();

  }

  Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();

  Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =

      NewPressure.getArchVGPRNum();

  Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();

}


void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,

                                     bool AtTop,

                                     const RegPressureTracker &RPTracker,

                                     const SIRegisterInfo *SRI,

                                     unsigned SGPRPressure,

                                     unsigned VGPRPressure, bool IsBottomUp) {

  Cand.SU = SU;

  Cand.AtTop = AtTop;


  if (!DAG->isTrackingPressure())

    return;


  Pressure.clear();

  MaxPressure.clear();


  // We try to use the cached PressureDiffs in the ScheduleDAG whenever

  // possible over querying the RegPressureTracker.

  //

  // RegPressureTracker will make a lot of LIS queries which are very

  // expensive, it is considered a slow function in this context.

  //

  // PressureDiffs are precomputed and cached, and getPressureDiff is just a

  // trivial lookup into an array. It is pretty much free.

  //

  // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of

  // PressureDiffs.

  if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {

    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,

                         DownwardTracker, UpwardTracker, DAG, SRI);

  } else {

    // Reserve 4 slots.

    Pressure.resize(4, 0);

    Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;

    Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;


    for (const auto &Diff : DAG->getPressureDiff(SU)) {

      if (!Diff.isValid())

        continue;

      // PressureDiffs is always bottom-up so if we're working top-down we need

      // to invert its sign.

      Pressure[Diff.getPSet()] +=

          (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());

    }


#ifdef EXPENSIVE_CHECKS

    std::vector<unsigned> CheckPressure, CheckMaxPressure;

    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,

                         DownwardTracker, UpwardTracker, DAG, SRI);

    if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=

            CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||

        Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=

            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {

      errs() << "Register Pressure is inaccurate when calculated through "

                "PressureDiff\n"

             << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]

             << ", expected "

             << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"

             << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]

             << ", expected "

             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";

      report_fatal_error("inaccurate register pressure calculation");

    }

#endif

  }


  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];


  // If two instructions increase the pressure of different register sets

  // by the same amount, the generic scheduler will prefer to schedule the

  // instruction that increases the set with the least amount of registers,

  // which in our case would be SGPRs.  This is rarely what we want, so

  // when we report excess/critical register pressure, we do it either

  // only for VGPRs or only for SGPRs.


  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.

  const unsigned MaxVGPRPressureInc = 16;

  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;

  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;


  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold

  // to increase the likelihood we don't go over the limits.  We should improve

  // the analysis to look through dependencies to find the path with the least

  // register pressure.


  // We only need to update the RPDelta for instructions that increase register

  // pressure. Instructions that decrease or keep reg pressure the same will be

  // marked as RegExcess in tryCandidate() when they are compared with

  // instructions that increase the register pressure.

  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {

    HasHighPressure = true;

    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);

  }


  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {

    HasHighPressure = true;

    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);

  }


  // Register pressure is considered 'CRITICAL' if it is approaching a value

  // that would reduce the wave occupancy for the execution unit.  When

  // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both

  // has the same cost, so we don't need to prefer one over the other.


  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;

  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;


  if (SGPRDelta >= 0 || VGPRDelta >= 0) {

    HasHighPressure = true;

    if (SGPRDelta > VGPRDelta) {

      Cand.RPDelta.CriticalMax =

          PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);

    } else {

      Cand.RPDelta.CriticalMax =

          PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);

    }

  }

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNodeFromQueue()

void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,

                                         const CandPolicy &ZonePolicy,

                                         const RegPressureTracker &RPTracker,

                                         SchedCandidate &Cand,

                                         bool IsBottomUp) {

  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);

  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();

  unsigned SGPRPressure = 0;

  unsigned VGPRPressure = 0;

  if (DAG->isTrackingPressure()) {

    if (!GCNTrackers) {

      SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

      VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

    } else {

      GCNRPTracker *T = IsBottomUp

                            ? static_cast<GCNRPTracker *>(&UpwardTracker)

                            : static_cast<GCNRPTracker *>(&DownwardTracker);

      SGPRPressure = T->getPressure().getSGPRNum();

      VGPRPressure = T->getPressure().getArchVGPRNum();

    }

  }

  ReadyQueue &Q = Zone.Available;

  for (SUnit *SU : Q) {


    SchedCandidate TryCand(ZonePolicy);

    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

                  VGPRPressure, IsBottomUp);

    // Pass SchedBoundary only when comparing nodes from the same boundary.

    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

    tryCandidate(Cand, TryCand, ZoneArg);

    if (TryCand.Reason != NoCand) {

      // Initialize resource delta if needed in case future heuristics query it.

      if (TryCand.ResDelta == SchedResourceDelta())

        TryCand.initResourceDelta(Zone.DAG, SchedModel);

      Cand.setBest(TryCand);

      LLVM_DEBUG(traceCandidate(Cand));

    }

  }

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNodeBidirectional()

SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {

  // Schedule as far as possible in the direction of no choice. This is most

  // efficient, but also provides the best heuristics for CriticalPSets.

  if (SUnit *SU = Bot.pickOnlyChoice()) {

    IsTopNode = false;

    return SU;

  }

  if (SUnit *SU = Top.pickOnlyChoice()) {

    IsTopNode = true;

    return SU;

  }

  // Set the bottom-up policy based on the state of the current bottom zone and

  // the instructions outside the zone, including the top zone.

  CandPolicy BotPolicy;

  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);

  // Set the top-down policy based on the state of the current top zone and

  // the instructions outside the zone, including the bottom zone.

  CandPolicy TopPolicy;

  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);


  // See if BotCand is still valid (because we previously scheduled from Top).

  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

  if (!BotCand.isValid() || BotCand.SU->isScheduled ||

      BotCand.Policy != BotPolicy) {

    BotCand.reset(CandPolicy());

    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,

                      /*IsBottomUp=*/true);

    assert(BotCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(BotCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,

                        /*IsBottomUp=*/true);

      assert(TCand.SU == BotCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Check if the top Q has a better candidate.

  LLVM_DEBUG(dbgs() << "Picking from Top:\n");

  if (!TopCand.isValid() || TopCand.SU->isScheduled ||

      TopCand.Policy != TopPolicy) {

    TopCand.reset(CandPolicy());

    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,

                      /*IsBottomUp=*/false);

    assert(TopCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(TopCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,

                        /*IsBottomUp=*/false);

      assert(TCand.SU == TopCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Pick best from BotCand and TopCand.

  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);

             dbgs() << "Bot Cand: "; traceCandidate(BotCand););

  SchedCandidate Cand = BotCand;

  TopCand.Reason = NoCand;

  tryCandidate(Cand, TopCand, nullptr);

  if (TopCand.Reason != NoCand) {

    Cand.setBest(TopCand);

  }

  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););


  IsTopNode = Cand.AtTop;

  return Cand.SU;

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNode()

SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {

  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }

  SUnit *SU;

  do {

    if (RegionPolicy.OnlyTopDown) {

      SU = Top.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        TopCand.reset(NoPolicy);

        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,

                          /*IsBottomUp=*/false);

        assert(TopCand.Reason != NoCand && "failed to find a candidate");

        SU = TopCand.SU;

      }

      IsTopNode = true;

    } else if (RegionPolicy.OnlyBottomUp) {

      SU = Bot.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        BotCand.reset(NoPolicy);

        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,

                          /*IsBottomUp=*/true);

        assert(BotCand.Reason != NoCand && "failed to find a candidate");

        SU = BotCand.SU;

      }

      IsTopNode = false;

    } else {

      SU = pickNodeBidirectional(IsTopNode);

    }

  } while (SU->isScheduled);


  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());

  return SU;

}


void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {

  if (GCNTrackers) {

    MachineInstr *MI = SU->getInstr();

    IsTopNode ? (void)DownwardTracker.advance(MI, false)

              : UpwardTracker.recede(*MI);

  }


  return GenericScheduler::schedNode(SU, IsTopNode);

}


GCNSchedStageID GCNSchedStrategy::getCurrentStage() {

  assert(CurrentStage && CurrentStage != SchedStages.end());

  return *CurrentStage;

}


bool GCNSchedStrategy::advanceStage() {

  assert(CurrentStage != SchedStages.end());

  if (!CurrentStage)

    CurrentStage = SchedStages.begin();

  else

    CurrentStage++;


  return CurrentStage != SchedStages.end();

}


bool GCNSchedStrategy::hasNextStage() const {

  assert(CurrentStage);

  return std::next(CurrentStage) != SchedStages.end();

}


GCNSchedStageID GCNSchedStrategy::getNextStage() const {

  assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());

  return *std::next(CurrentStage);

}


GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(

    const MachineSchedContext *C, bool IsLegacyScheduler)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);

  SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);

  SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);

  SchedStages.push_back(GCNSchedStageID::PreRARematerialize);

  GCNTrackers = GCNTrackers & !IsLegacyScheduler;

}


GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);

}


bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,

                                          SchedCandidate &TryCand,

                                          SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Avoid spilling by exceeding the register limit.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                  RegExcess, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;


    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources, TryCand, Cand,

                   ResourceDemand))

      return TryCand.Reason != NoCand;


    // Unconditionally try to reduce latency.

    if (tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Keep clustered nodes together to encourage downstream peephole

  // optimizations which may reduce resource requirements.

  //

  // This is a best effort to set things up for a post-RA pass. Optimizations

  // like generating loads of multiple registers should ideally be done within

  // the scheduler pass by combining the loads during DAG postprocessing.

  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;

  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;

  bool CandIsClusterSucc =

      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);

  bool TryCandIsClusterSucc =

      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

                 Cluster))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max critical pressure in the scheduled region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,

                  TryCand, Cand, RegCritical, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,

                  Cand, RegMax, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Fall through to original instruction order.

    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||

        (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {

      TryCand.Reason = NodeOrder;

      return true;

    }

  }

  return false;

}


GCNMaxMemoryClauseSchedStrategy::GCNMaxMemoryClauseSchedStrategy(

    const MachineSchedContext *C)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::MemoryClauseInitialSchedule);

}


/// GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as

/// much as possible. This is achieved by:

//  1. Prioritize clustered operations before stall latency heuristic.

//  2. Prioritize long-latency-load before stall latency heuristic.

///

/// \param Cand provides the policy and current best candidate.

/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

/// \param Zone describes the scheduled zone that we are extending, or nullptr

///             if Cand is from a different zone than TryCand.

/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)

bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,

                                                   SchedCandidate &TryCand,

                                                   SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  if (DAG->isTrackingPressure()) {

    // Avoid exceeding the target's limit.

    if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                    RegExcess, TRI, DAG->MF))

      return TryCand.Reason != NoCand;


    // Avoid increasing the max critical pressure in the scheduled region.

    if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,

                    TryCand, Cand, RegCritical, TRI, DAG->MF))

      return TryCand.Reason != NoCand;

  }


  // MaxMemoryClause-specific: We prioritize clustered instructions as we would

  // get more benefit from clausing these memory instructions.

  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;

  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;

  bool CandIsClusterSucc =

      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);

  bool TryCandIsClusterSucc =

      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

                 Cluster))

    return TryCand.Reason != NoCand;


  // We only compare a subset of features when comparing nodes between

  // Top and Bottom boundary. Some properties are simply incomparable, in many

  // other instances we should only override the other boundary if something

  // is a clear good pick on one boundary. Skip heuristics that are more

  // "tie-breaking" in nature.

  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // For loops that are acyclic path limited, aggressively schedule for

    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal

    // heuristics to take precedence.

    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&

        tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // MaxMemoryClause-specific: Prioritize long latency memory load

    // instructions in top-bottom order to hide more latency. The mayLoad check

    // is used to exclude store-like instructions, which we do not want to

    // scheduler them too early.

    bool TryMayLoad =

        TryCand.SU->isInstr() && TryCand.SU->getInstr()->mayLoad();

    bool CandMayLoad = Cand.SU->isInstr() && Cand.SU->getInstr()->mayLoad();


    if (TryMayLoad || CandMayLoad) {

      bool TryLongLatency =

          TryCand.SU->Latency > 10 * Cand.SU->Latency && TryMayLoad;

      bool CandLongLatency =

          10 * TryCand.SU->Latency < Cand.SU->Latency && CandMayLoad;


      if (tryGreater(Zone->isTop() ? TryLongLatency : CandLongLatency,

                     Zone->isTop() ? CandLongLatency : TryLongLatency, TryCand,

                     Cand, Stall))

        return TryCand.Reason != NoCand;

    }

    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;

  }


  if (SameBoundary) {

    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,

                  Cand, RegMax, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources, TryCand, Cand,

                   ResourceDemand))

      return TryCand.Reason != NoCand;


    // Avoid serializing long latency dependence chains.

    // For acyclic path limited loops, latency was already checked above.

    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&

        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Fall through to original instruction order.

    if (Zone->isTop() == (TryCand.SU->NodeNum < Cand.SU->NodeNum)) {

      assert(TryCand.SU->NodeNum != Cand.SU->NodeNum);

      TryCand.Reason = NodeOrder;

      return true;

    }

  }


  return false;

}


GCNScheduleDAGMILive::GCNScheduleDAGMILive(

    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)

    : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),

      MFI(*MF.getInfo<SIMachineFunctionInfo>()),

      StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),

      RegionLiveOuts(this, /*IsLiveOut=*/true) {


  // We want regions with a single MI to be scheduled so that we can reason

  // about them correctly during scheduling stages that move MIs between regions

  // (e.g., rematerialization).

  ScheduleSingleMIRegions = true;

  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");

  if (RelaxedOcc) {

    MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);

    if (MinOccupancy != StartingOccupancy)

      LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy

                        << ".\n");

  }

}


std::unique_ptr<GCNSchedStage>

GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {

  switch (SchedStageID) {

  case GCNSchedStageID::OccInitialSchedule:

    return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);

  case GCNSchedStageID::UnclusteredHighRPReschedule:

    return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);

  case GCNSchedStageID::ClusteredLowOccupancyReschedule:

    return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);

  case GCNSchedStageID::PreRARematerialize:

    return std::make_unique<PreRARematStage>(SchedStageID, *this);

  case GCNSchedStageID::ILPInitialSchedule:

    return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);

  case GCNSchedStageID::MemoryClauseInitialSchedule:

    return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,

                                                              *this);

  }


  llvm_unreachable("Unknown SchedStageID.");

}


void GCNScheduleDAGMILive::schedule() {

  // Collect all scheduling regions. The actual scheduling is performed in

  // GCNScheduleDAGMILive::finalizeSchedule.

  Regions.push_back(std::pair(RegionBegin, RegionEnd));

}


GCNRegPressure

GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {

  GCNDownwardRPTracker RPTracker(*LIS);

  RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,

                    &LiveIns[RegionIdx]);

  return RPTracker.moveMaxPressure();

}


static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,

                                        MachineBasicBlock::iterator RegionEnd) {

  auto REnd = RegionEnd == RegionBegin->getParent()->end()

                  ? std::prev(RegionEnd)

                  : RegionEnd;

  return &*skipDebugInstructionsBackward(REnd, RegionBegin);

}


void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,

                                                const MachineBasicBlock *MBB) {

  GCNDownwardRPTracker RPTracker(*LIS);


  // If the block has the only successor then live-ins of that successor are

  // live-outs of the current block. We can reuse calculated live set if the

  // successor will be sent to scheduling past current block.


  // However, due to the bug in LiveInterval analysis it may happen that two

  // predecessors of the same successor block have different lane bitmasks for

  // a live-out register. Workaround that by sticking to one-to-one relationship

  // i.e. one predecessor with one successor block.

  const MachineBasicBlock *OnlySucc = nullptr;

  if (MBB->succ_size() == 1) {

    auto *Candidate = *MBB->succ_begin();

    if (!Candidate->empty() && Candidate->pred_size() == 1) {

      SlotIndexes *Ind = LIS->getSlotIndexes();

      if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))

        OnlySucc = Candidate;

    }

  }


  // Scheduler sends regions from the end of the block upwards.

  size_t CurRegion = RegionIdx;

  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)

    if (Regions[CurRegion].first->getParent() != MBB)

      break;

  --CurRegion;


  auto I = MBB->begin();

  auto LiveInIt = MBBLiveIns.find(MBB);

  auto &Rgn = Regions[CurRegion];

  auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);

  if (LiveInIt != MBBLiveIns.end()) {

    auto LiveIn = std::move(LiveInIt->second);

    RPTracker.reset(*MBB->begin(), &LiveIn);

    MBBLiveIns.erase(LiveInIt);

  } else {

    I = Rgn.first;

    auto LRS = BBLiveInMap.lookup(NonDbgMI);

#ifdef EXPENSIVE_CHECKS

    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));

#endif

    RPTracker.reset(*I, &LRS);

  }


  for (;;) {

    I = RPTracker.getNext();


    if (Regions[CurRegion].first == I || NonDbgMI == I) {

      LiveIns[CurRegion] = RPTracker.getLiveRegs();

      RPTracker.clearMaxPressure();

    }


    if (Regions[CurRegion].second == I) {

      Pressure[CurRegion] = RPTracker.moveMaxPressure();

      if (CurRegion-- == RegionIdx)

        break;

      auto &Rgn = Regions[CurRegion];

      NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);

    }

    RPTracker.advanceToNext();

    RPTracker.advanceBeforeNext();

  }


  if (OnlySucc) {

    if (I != MBB->end()) {

      RPTracker.advanceToNext();

      RPTracker.advance(MBB->end());

    }

    RPTracker.advanceBeforeNext();

    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();

  }

}


DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>

GCNScheduleDAGMILive::getRegionLiveInMap() const {

  assert(!Regions.empty());

  std::vector<MachineInstr *> RegionFirstMIs;

  RegionFirstMIs.reserve(Regions.size());

  for (auto &[RegionBegin, RegionEnd] : reverse(Regions))

    RegionFirstMIs.push_back(

        &*skipDebugInstructionsForward(RegionBegin, RegionEnd));


  return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);

}


DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>

GCNScheduleDAGMILive::getRegionLiveOutMap() const {

  assert(!Regions.empty());

  std::vector<MachineInstr *> RegionLastMIs;

  RegionLastMIs.reserve(Regions.size());

  for (auto &[RegionBegin, RegionEnd] : reverse(Regions))

    RegionLastMIs.push_back(getLastMIForRegion(RegionBegin, RegionEnd));


  return getLiveRegMap(RegionLastMIs, /*After=*/true, *LIS);

}


void RegionPressureMap::buildLiveRegMap() {

  IdxToInstruction.clear();


  RegionLiveRegMap =

      IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();

  for (unsigned I = 0; I < DAG->Regions.size(); I++) {

    MachineInstr *RegionKey =

        IsLiveOut

            ? getLastMIForRegion(DAG->Regions[I].first, DAG->Regions[I].second)

            : &*DAG->Regions[I].first;

    IdxToInstruction[I] = RegionKey;

  }

}


void GCNScheduleDAGMILive::finalizeSchedule() {

  // Start actual scheduling here. This function is called by the base

  // MachineScheduler after all regions have been recorded by

  // GCNScheduleDAGMILive::schedule().

  LiveIns.resize(Regions.size());

  Pressure.resize(Regions.size());

  RegionsWithHighRP.resize(Regions.size());

  RegionsWithExcessRP.resize(Regions.size());

  RegionsWithIGLPInstrs.resize(Regions.size());

  RegionsWithHighRP.reset();

  RegionsWithExcessRP.reset();

  RegionsWithIGLPInstrs.reset();


  runSchedStages();

}


void GCNScheduleDAGMILive::runSchedStages() {

  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");


  if (!Regions.empty()) {

    BBLiveInMap = getRegionLiveInMap();

    if (GCNTrackers)

      RegionLiveOuts.buildLiveRegMap();

  }


  GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);

  while (S.advanceStage()) {

    auto Stage = createSchedStage(S.getCurrentStage());

    if (!Stage->initGCNSchedStage())

      continue;


    for (auto Region : Regions) {

      RegionBegin = Region.first;

      RegionEnd = Region.second;

      // Setup for scheduling the region and check whether it should be skipped.

      if (!Stage->initGCNRegion()) {

        Stage->advanceRegion();

        exitRegion();

        continue;

      }


      if (GCNTrackers) {

        GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();

        GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();

        GCNRPTracker::LiveRegSet *RegionLiveIns =

            &LiveIns[Stage->getRegionIdx()];


        reinterpret_cast<GCNRPTracker *>(DownwardTracker)

            ->reset(MRI, *RegionLiveIns);

        reinterpret_cast<GCNRPTracker *>(UpwardTracker)

            ->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx(

                             Stage->getRegionIdx()));

      }


      ScheduleDAGMILive::schedule();

      Stage->finalizeGCNRegion();

    }


    Stage->finalizeGCNSchedStage();

  }

}


#ifndef NDEBUG

raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {

  switch (StageID) {

  case GCNSchedStageID::OccInitialSchedule:

    OS << "Max Occupancy Initial Schedule";

    break;

  case GCNSchedStageID::UnclusteredHighRPReschedule:

    OS << "Unclustered High Register Pressure Reschedule";

    break;

  case GCNSchedStageID::ClusteredLowOccupancyReschedule:

    OS << "Clustered Low Occupancy Reschedule";

    break;

  case GCNSchedStageID::PreRARematerialize:

    OS << "Pre-RA Rematerialize";

    break;

  case GCNSchedStageID::ILPInitialSchedule:

    OS << "Max ILP Initial Schedule";

    break;

  case GCNSchedStageID::MemoryClauseInitialSchedule:

    OS << "Max memory clause Initial Schedule";

    break;

  }


  return OS;

}

#endif


GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)

    : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),

      MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}


bool GCNSchedStage::initGCNSchedStage() {

  if (!DAG.LIS)

    return false;


  LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");

  return true;

}


bool UnclusteredHighRPStage::initGCNSchedStage() {

  if (DisableUnclusterHighRP)

    return false;


  if (!GCNSchedStage::initGCNSchedStage())

    return false;


  if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())

    return false;


  SavedMutations.swap(DAG.Mutations);

  DAG.addMutation(

      createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry));


  InitialOccupancy = DAG.MinOccupancy;

  // Aggressivly try to reduce register pressure in the unclustered high RP

  // stage. Temporarily increase occupancy target in the region.

  S.SGPRLimitBias = S.HighRPSGPRBias;

  S.VGPRLimitBias = S.HighRPVGPRBias;

  if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)

    MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);


  LLVM_DEBUG(

      dbgs()

      << "Retrying function scheduling without clustering. "

         "Aggressivly try to reduce register pressure to achieve occupancy "

      << DAG.MinOccupancy << ".\n");


  return true;

}


bool ClusteredLowOccStage::initGCNSchedStage() {

  if (DisableClusteredLowOccupancy)

    return false;


  if (!GCNSchedStage::initGCNSchedStage())

    return false;


  // Don't bother trying to improve ILP in lower RP regions if occupancy has not

  // been dropped. All regions will have already been scheduled with the ideal

  // occupancy targets.

  if (DAG.StartingOccupancy <= DAG.MinOccupancy)

    return false;


  LLVM_DEBUG(

      dbgs() << "Retrying function scheduling with lowest recorded occupancy "

             << DAG.MinOccupancy << ".\n");

  return true;

}


/// Allows to easily filter for this stage's debug output.

#define REMAT_PREFIX "[PreRARemat] "

#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)


bool PreRARematStage::initGCNSchedStage() {

  // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for

  // regions inbetween the defs and region we sinked the def to. Will need to be

  // fixed if there is another pass after this pass.

  assert(!S.hasNextStage());


  if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)

    return false;


  // Before performing any IR modification record the parent region of each MI

  // and the parent MBB of each region.

  const unsigned NumRegions = DAG.Regions.size();

  RegionBB.reserve(NumRegions);

  for (unsigned I = 0; I < NumRegions; ++I) {

    RegionBoundaries Region = DAG.Regions[I];

    for (auto MI = Region.first; MI != Region.second; ++MI)

      MIRegion.insert({&*MI, I});

    RegionBB.push_back(Region.first->getParent());

  }


  if (!canIncreaseOccupancyOrReduceSpill())

    return false;


  // Rematerialize identified instructions and update scheduler's state.

  rematerialize();

  if (GCNTrackers)

    DAG.RegionLiveOuts.buildLiveRegMap();

  REMAT_DEBUG({

    dbgs() << "Retrying function scheduling with new min. occupancy of "

           << AchievedOcc << " from rematerializing (original was "

           << DAG.MinOccupancy;

    if (TargetOcc)

      dbgs() << ", target was " << *TargetOcc;

    dbgs() << ")\n";

  });


  if (AchievedOcc > DAG.MinOccupancy) {

    DAG.MinOccupancy = AchievedOcc;

    SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

    MFI.increaseOccupancy(MF, DAG.MinOccupancy);

  }

  return true;

}


void GCNSchedStage::finalizeGCNSchedStage() {

  DAG.finishBlock();

  LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");

}


void UnclusteredHighRPStage::finalizeGCNSchedStage() {

  SavedMutations.swap(DAG.Mutations);

  S.SGPRLimitBias = S.VGPRLimitBias = 0;

  if (DAG.MinOccupancy > InitialOccupancy) {

    LLVM_DEBUG(dbgs() << StageID

                      << " stage successfully increased occupancy to "

                      << DAG.MinOccupancy << '\n');

  }


  GCNSchedStage::finalizeGCNSchedStage();

}


bool GCNSchedStage::initGCNRegion() {

  // Check whether this new region is also a new block.

  if (DAG.RegionBegin->getParent() != CurrentMBB)

    setupNewBlock();


  unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());

  DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);


  // Skip empty scheduling regions (0 or 1 schedulable instructions).

  if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))

    return false;


  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

  LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)

                    << " " << CurrentMBB->getName()

                    << "\n  From: " << *DAG.begin() << "    To: ";

             if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;

             else dbgs() << "End";

             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');


  // Save original instruction order before scheduling for possible revert.

  Unsched.clear();

  Unsched.reserve(DAG.NumRegionInstrs);

  if (StageID == GCNSchedStageID::OccInitialSchedule ||

      StageID == GCNSchedStageID::ILPInitialSchedule) {

    const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG.TII);

    for (auto &I : DAG) {

      Unsched.push_back(&I);

      if (SII->isIGLPMutationOnly(I.getOpcode()))

        DAG.RegionsWithIGLPInstrs[RegionIdx] = true;

    }

  } else {

    for (auto &I : DAG)

      Unsched.push_back(&I);

  }


  PressureBefore = DAG.Pressure[RegionIdx];


  LLVM_DEBUG(

      dbgs() << "Pressure before scheduling:\nRegion live-ins:"

             << print(DAG.LiveIns[RegionIdx], DAG.MRI)

             << "Region live-in pressure:  "

             << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))

             << "Region register pressure: " << print(PressureBefore));


  S.HasHighPressure = false;

  S.KnownExcessRP = isRegionWithExcessRP();


  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&

      StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {

    SavedMutations.clear();

    SavedMutations.swap(DAG.Mutations);

    bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||

                          StageID == GCNSchedStageID::ILPInitialSchedule;

    DAG.addMutation(createIGroupLPDAGMutation(

        IsInitialStage ? AMDGPU::SchedulingPhase::Initial

                       : AMDGPU::SchedulingPhase::PreRAReentry));

  }


  return true;

}


bool UnclusteredHighRPStage::initGCNRegion() {

  // Only reschedule regions that have excess register pressure (i.e. spilling)

  // or had minimum occupancy at the beginning of the stage (as long as

  // rescheduling of previous regions did not make occupancy drop back down to

  // the initial minimum).

  unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();

  if (!DAG.RegionsWithExcessRP[RegionIdx] &&

      (DAG.MinOccupancy <= InitialOccupancy ||

       DAG.Pressure[RegionIdx].getOccupancy(ST, DynamicVGPRBlockSize) !=

           InitialOccupancy))

    return false;


  return GCNSchedStage::initGCNRegion();

}


bool ClusteredLowOccStage::initGCNRegion() {

  // We may need to reschedule this region if it wasn't rescheduled in the last

  // stage, or if we found it was testing critical register pressure limits in

  // the unclustered reschedule stage. The later is because we may not have been

  // able to raise the min occupancy in the previous stage so the region may be

  // overly constrained even if it was already rescheduled.

  if (!DAG.RegionsWithHighRP[RegionIdx])

    return false;


  return GCNSchedStage::initGCNRegion();

}


bool PreRARematStage::initGCNRegion() {

  return RescheduleRegions[RegionIdx] && GCNSchedStage::initGCNRegion();

}


void GCNSchedStage::setupNewBlock() {

  if (CurrentMBB)

    DAG.finishBlock();


  CurrentMBB = DAG.RegionBegin->getParent();

  DAG.startBlock(CurrentMBB);

  // Get real RP for the region if it hasn't be calculated before. After the

  // initial schedule stage real RP will be collected after scheduling.

  if (StageID == GCNSchedStageID::OccInitialSchedule ||

      StageID == GCNSchedStageID::ILPInitialSchedule ||

      StageID == GCNSchedStageID::MemoryClauseInitialSchedule)

    DAG.computeBlockPressure(RegionIdx, CurrentMBB);

}


void GCNSchedStage::finalizeGCNRegion() {

  DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);

  if (S.HasHighPressure)

    DAG.RegionsWithHighRP[RegionIdx] = true;


  // Revert scheduling if we have dropped occupancy or there is some other

  // reason that the original schedule is better.

  checkScheduling();


  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&

      StageID != GCNSchedStageID::UnclusteredHighRPReschedule)

    SavedMutations.swap(DAG.Mutations);


  DAG.exitRegion();

  advanceRegion();

}


void GCNSchedStage::checkScheduling() {

  // Check the results of scheduling.

  PressureAfter = DAG.getRealRegPressure(RegionIdx);


  LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));

  LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");


  unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();


  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&

      PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {

    DAG.Pressure[RegionIdx] = PressureAfter;


    // Early out if we have achieved the occupancy target.

    LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");

    return;

  }


  unsigned TargetOccupancy = std::min(

      S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);

  unsigned WavesAfter = std::min(

      TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize));

  unsigned WavesBefore = std::min(

      TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));

  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore

                    << ", after " << WavesAfter << ".\n");


  // We may not be able to keep the current target occupancy because of the just

  // scheduled region. We might still be able to revert scheduling if the

  // occupancy before was higher, or if the current schedule has register

  // pressure higher than the excess limits which could lead to more spilling.

  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);


  // Allow memory bound functions to drop to 4 waves if not limited by an

  // attribute.

  if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&

      WavesAfter >= MFI.getMinAllowedOccupancy()) {

    LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "

                      << MFI.getMinAllowedOccupancy() << " waves\n");

    NewOccupancy = WavesAfter;

  }


  if (NewOccupancy < DAG.MinOccupancy) {

    DAG.MinOccupancy = NewOccupancy;

    MFI.limitOccupancy(DAG.MinOccupancy);

    LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "

                      << DAG.MinOccupancy << ".\n");

  }

  // The maximum number of arch VGPR on non-unified register file, or the

  // maximum VGPR + AGPR in the unified register file case.

  unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);

  // The maximum number of arch VGPR for both unified and non-unified register

  // file.

  unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());

  unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);


  if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||

      PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||

      PressureAfter.getAGPRNum() > MaxArchVGPRs ||

      PressureAfter.getSGPRNum() > MaxSGPRs) {

    DAG.RegionsWithHighRP[RegionIdx] = true;

    DAG.RegionsWithExcessRP[RegionIdx] = true;

  }


  // Revert if this region's schedule would cause a drop in occupancy or

  // spilling.

  if (shouldRevertScheduling(WavesAfter))

    revertScheduling();

  else

    DAG.Pressure[RegionIdx] = PressureAfter;

}


unsigned

GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,

                                      DenseMap<unsigned, unsigned> &ReadyCycles,

                                      const TargetSchedModel &SM) {

  unsigned ReadyCycle = CurrCycle;

  for (auto &D : SU.Preds) {

    if (D.isAssignedRegDep()) {

      MachineInstr *DefMI = D.getSUnit()->getInstr();

      unsigned Latency = SM.computeInstrLatency(DefMI);

      unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];

      ReadyCycle = std::max(ReadyCycle, DefReady + Latency);

    }

  }

  ReadyCycles[SU.NodeNum] = ReadyCycle;

  return ReadyCycle;

}


#ifndef NDEBUG

struct EarlierIssuingCycle {

  bool operator()(std::pair<MachineInstr *, unsigned> A,

                  std::pair<MachineInstr *, unsigned> B) const {

    return A.second < B.second;

  }

};


static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,

                                        EarlierIssuingCycle> &ReadyCycles) {

  if (ReadyCycles.empty())

    return;

  unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();

  dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum

         << " ##################\n# Cycle #\t\t\tInstruction          "

            "             "

            "                            \n";

  unsigned IPrev = 1;

  for (auto &I : ReadyCycles) {

    if (I.second > IPrev + 1)

      dbgs() << "****************************** BUBBLE OF " << I.second - IPrev

             << " CYCLES DETECTED ******************************\n\n";

    dbgs() << "[ " << I.second << " ]  :  " << *I.first << "\n";

    IPrev = I.second;

  }

}

#endif


ScheduleMetrics

GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {

#ifndef NDEBUG

  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>

      ReadyCyclesSorted;

#endif

  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();

  unsigned SumBubbles = 0;

  DenseMap<unsigned, unsigned> ReadyCycles;

  unsigned CurrCycle = 0;

  for (auto &SU : InputSchedule) {

    unsigned ReadyCycle =

        computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);

    SumBubbles += ReadyCycle - CurrCycle;

#ifndef NDEBUG

    ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));

#endif

    CurrCycle = ++ReadyCycle;

  }

#ifndef NDEBUG

  LLVM_DEBUG(

      printScheduleModel(ReadyCyclesSorted);

      dbgs() << "\n\t"

             << "Metric: "

             << (SumBubbles

                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle

                     : 1)

             << "\n\n");

#endif


  return ScheduleMetrics(CurrCycle, SumBubbles);

}


ScheduleMetrics

GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {

#ifndef NDEBUG

  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>

      ReadyCyclesSorted;

#endif

  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();

  unsigned SumBubbles = 0;

  DenseMap<unsigned, unsigned> ReadyCycles;

  unsigned CurrCycle = 0;

  for (auto &MI : DAG) {

    SUnit *SU = DAG.getSUnit(&MI);

    if (!SU)

      continue;

    unsigned ReadyCycle =

        computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);

    SumBubbles += ReadyCycle - CurrCycle;

#ifndef NDEBUG

    ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));

#endif

    CurrCycle = ++ReadyCycle;

  }

#ifndef NDEBUG

  LLVM_DEBUG(

      printScheduleModel(ReadyCyclesSorted);

      dbgs() << "\n\t"

             << "Metric: "

             << (SumBubbles

                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle

                     : 1)

             << "\n\n");

#endif


  return ScheduleMetrics(CurrCycle, SumBubbles);

}


bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (WavesAfter < DAG.MinOccupancy)

    return true;


  // For dynamic VGPR mode, we don't want to waste any VGPR blocks.

  if (DAG.MFI.isDynamicVGPREnabled()) {

    unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(

        &ST, DAG.MFI.getDynamicVGPRBlockSize(),

        PressureBefore.getVGPRNum(false));

    unsigned BlocksAfter = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(

        &ST, DAG.MFI.getDynamicVGPRBlockSize(),

        PressureAfter.getVGPRNum(false));

    if (BlocksAfter > BlocksBefore)

      return true;

  }


  return false;

}


bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (PressureAfter == PressureBefore)

    return false;


  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))

    return true;


  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {

  // If RP is not reduced in the unclustered reschedule stage, revert to the

  // old schedule.

  if ((WavesAfter <=

           PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) &&

       mayCauseSpilling(WavesAfter)) ||

      GCNSchedStage::shouldRevertScheduling(WavesAfter)) {

    LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");

    return true;

  }


  // Do not attempt to relax schedule even more if we are already spilling.

  if (isRegionWithExcessRP())

    return false;


  LLVM_DEBUG(

      dbgs()

      << "\n\t      *** In shouldRevertScheduling ***\n"

      << "      *********** BEFORE UnclusteredHighRPStage ***********\n");

  ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits);

  LLVM_DEBUG(

      dbgs()

      << "\n      *********** AFTER UnclusteredHighRPStage ***********\n");

  ScheduleMetrics MAfter = getScheduleMetrics(DAG);

  unsigned OldMetric = MBefore.getMetric();

  unsigned NewMetric = MAfter.getMetric();

  unsigned WavesBefore = std::min(

      S.getTargetOccupancy(),

      PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()));

  unsigned Profit =

      ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *

       ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /

       NewMetric) /

      ScheduleMetrics::ScaleFactor;

  LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "

                    << MAfter << "Profit: " << Profit << "\n");

  return Profit < ScheduleMetrics::ScaleFactor;

}


bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (PressureAfter == PressureBefore)

    return false;


  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))

    return true;


  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {

  return GCNSchedStage::shouldRevertScheduling(WavesAfter) ||

         mayCauseSpilling(WavesAfter) || (TargetOcc && WavesAfter < TargetOcc);

}


bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool MemoryClauseInitialScheduleStage::shouldRevertScheduling(

    unsigned WavesAfter) {

  return mayCauseSpilling(WavesAfter);

}


bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {

  if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&

      !PressureAfter.less(MF, PressureBefore)) {

    LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");

    return true;

  }


  return false;

}


void GCNSchedStage::revertScheduling() {

  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");

  DAG.RegionEnd = DAG.RegionBegin;

  int SkippedDebugInstr = 0;

  for (MachineInstr *MI : Unsched) {

    if (MI->isDebugInstr()) {

      ++SkippedDebugInstr;

      continue;

    }


    if (MI->getIterator() != DAG.RegionEnd) {

      DAG.BB->splice(DAG.RegionEnd, DAG.BB, MI);

      if (!MI->isDebugInstr())

        DAG.LIS->handleMove(*MI, true);

    }


    // Reset read-undef flags and update them later.

    for (auto &Op : MI->all_defs())

      Op.setIsUndef(false);

    RegisterOperands RegOpers;

    RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);

    if (!MI->isDebugInstr()) {

      if (DAG.ShouldTrackLaneMasks) {

        // Adjust liveness and add missing dead+read-undef flags.

        SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();

        RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);

      } else {

        // Adjust for missing dead-def flags.

        RegOpers.detectDeadDefs(*MI, *DAG.LIS);

      }

    }

    DAG.RegionEnd = MI->getIterator();

    ++DAG.RegionEnd;

    LLVM_DEBUG(dbgs() << "Scheduling " << *MI);

  }


  // After reverting schedule, debug instrs will now be at the end of the block

  // and RegionEnd will point to the first debug instr. Increment RegionEnd

  // pass debug instrs to the actual end of the scheduling region.

  while (SkippedDebugInstr-- > 0)

    ++DAG.RegionEnd;


  // If Unsched.front() instruction is a debug instruction, this will actually

  // shrink the region since we moved all debug instructions to the end of the

  // block. Find the first instruction that is not a debug instruction.

  DAG.RegionBegin = Unsched.front()->getIterator();

  if (DAG.RegionBegin->isDebugInstr()) {

    for (MachineInstr *MI : Unsched) {

      if (MI->isDebugInstr())

        continue;

      DAG.RegionBegin = MI->getIterator();

      break;

    }

  }


  // Then move the debug instructions back into their correct place and set

  // RegionBegin and RegionEnd if needed.

  DAG.placeDebugValues();


  DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);

}


bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,

                                         SlotIndex OriginalIdx,

                                         SlotIndex RematIdx) const {


  LiveIntervals *LIS = DAG.LIS;

  MachineRegisterInfo &MRI = DAG.MRI;

  OriginalIdx = OriginalIdx.getRegSlot(true);

  RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));

  for (const MachineOperand &MO : InstToRemat->operands()) {

    if (!MO.isReg() || !MO.getReg() || !MO.readsReg())

      continue;


    if (!MO.getReg().isVirtual()) {

      // Do not attempt to reason about PhysRegs

      // TODO: better analysis of PhysReg livness

      if (!DAG.MRI.isConstantPhysReg(MO.getReg()) &&

          !DAG.TII->isIgnorableUse(MO))

        return false;


      // Constant PhysRegs and IgnorableUses are okay

      continue;

    }


    LiveInterval &LI = LIS->getInterval(MO.getReg());

    const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);

    assert(OVNI);


    // Don't allow rematerialization immediately after the original def.

    // It would be incorrect if InstToRemat redefines the register.

    // See PR14098.

    if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))

      return false;


    if (OVNI != LI.getVNInfoAt(RematIdx))

      return false;


    // Check that subrange is live at RematIdx.

    if (LI.hasSubRanges()) {

      const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();

      unsigned SubReg = MO.getSubReg();

      LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)

                              : MRI.getMaxLaneMaskForVReg(MO.getReg());

      for (LiveInterval::SubRange &SR : LI.subranges()) {

        if ((SR.LaneMask & LM).none())

          continue;

        if (!SR.liveAt(RematIdx))

          return false;


        // Early exit if all used lanes are checked. No need to continue.

        LM &= ~SR.LaneMask;

        if (LM.none())

          break;

      }

    }

  }

  return true;

}


bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {

  const Function &F = MF.getFunction();


  // Maps optimizable regions (i.e., regions at minimum and register-limited

  // occupancy, or regions with spilling) to the target RP we would like to

  // reach.

  DenseMap<unsigned, GCNRPTarget> OptRegions;

  unsigned MaxSGPRs = ST.getMaxNumSGPRs(F);

  unsigned MaxVGPRs = ST.getMaxNumVGPRs(F);

  auto ResetTargetRegions = [&]() {

    OptRegions.clear();

    for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

      const GCNRegPressure &RP = DAG.Pressure[I];

      GCNRPTarget Target(MaxSGPRs, MaxVGPRs, MF, RP);

      if (!Target.satisfied())

        OptRegions.insert({I, Target});

    }

  };


  ResetTargetRegions();

  if (!OptRegions.empty() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {

    // In addition to register usage being above addressable limits, occupancy

    // below the minimum is considered like "spilling" as well.

    TargetOcc = std::nullopt;

  } else {

    // There is no spilling and room to improve occupancy; set up "increased

    // occupancy targets" for all regions.

    TargetOcc = DAG.MinOccupancy + 1;

    unsigned VGPRBlockSize =

        MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();

    MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false);

    MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);

    ResetTargetRegions();

  }

  REMAT_DEBUG({

    dbgs() << "Analyzing ";

    MF.getFunction().printAsOperand(dbgs(), false);

    dbgs() << ": ";

    if (OptRegions.empty()) {

      dbgs() << "no objective to achieve, occupancy is maximal at "

             << MFI.getMaxWavesPerEU();

    } else if (!TargetOcc) {

      dbgs() << "reduce spilling (minimum target occupancy is "

             << MFI.getMinWavesPerEU() << ')';

    } else {

      dbgs() << "increase occupancy from " << DAG.MinOccupancy << " to "

             << TargetOcc;

    }

    dbgs() << '\n';

    for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

      if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end()) {

        dbgs() << REMAT_PREFIX << "  [" << I << "] " << OptIt->getSecond()

               << '\n';

      }

    }

  });

  if (OptRegions.empty())

    return false;


  // Accounts for a reduction in RP in an optimizable region. Returns whether we

  // estimate that we have identified enough rematerialization opportunities to

  // achieve our goal, and sets Progress to true when this particular reduction

  // in pressure was helpful toward that goal.

  auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,

                              bool &Progress) -> bool {

    GCNRPTarget &Target = OptIt->getSecond();

    if (!Target.isSaveBeneficial(Reg))

      return false;

    Progress = true;

    Target.saveReg(Reg, Mask, DAG.MRI);

    if (Target.satisfied())

      OptRegions.erase(OptIt->getFirst());

    return OptRegions.empty();

  };


  // We need up-to-date live-out info. to query live-out register masks in

  // regions containing rematerializable instructions.

  DAG.RegionLiveOuts.buildLiveRegMap();


  // Cache set of registers that are going to be rematerialized.

  DenseSet<unsigned> RematRegs;


  // Identify rematerializable instructions in the function.

  for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

    auto Region = DAG.Regions[I];

    for (auto MI = Region.first; MI != Region.second; ++MI) {

      // The instruction must be trivially rematerializable.

      MachineInstr &DefMI = *MI;

      if (!isTriviallyReMaterializable(DefMI))

        continue;


      // We only support rematerializing virtual registers with one definition.

      Register Reg = DefMI.getOperand(0).getReg();

      if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))

        continue;


      // We only care to rematerialize the instruction if it has a single

      // non-debug user in a different region. The using MI may not belong to a

      // region if it is a lone region terminator.

      MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);

      if (!UseMI)

        continue;

      auto UseRegion = MIRegion.find(UseMI);

      if (UseRegion != MIRegion.end() && UseRegion->second == I)

        continue;


      // Do not rematerialize an instruction if it uses or is used by an

      // instruction that we have designated for rematerialization.

      // FIXME: Allow for rematerialization chains: this requires 1. updating

      // remat points to account for uses that are rematerialized, and 2. either

      // rematerializing the candidates in careful ordering, or deferring the

      // MBB RP walk until the entire chain has been rematerialized.

      if (Rematerializations.contains(UseMI) ||

          llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {

            return MO.isReg() && RematRegs.contains(MO.getReg());

          }))

        continue;


      // Do not rematerialize an instruction it it uses registers that aren't

      // available at its use. This ensures that we are not extending any live

      // range while rematerializing.

      SlotIndex DefIdx = DAG.LIS->getInstructionIndex(DefMI);

      SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true);

      if (!allUsesAvailableAt(&DefMI, DefIdx, UseIdx))

        continue;


      REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);

      RematInstruction &Remat =

          Rematerializations.try_emplace(&DefMI, UseMI).first->second;


      bool RematUseful = false;

      if (auto It = OptRegions.find(I); It != OptRegions.end()) {

        // Optimistically consider that moving the instruction out of its

        // defining region will reduce RP in the latter; this assumes that

        // maximum RP in the region is reached somewhere between the defining

        // instruction and the end of the region.

        REMAT_DEBUG(dbgs() << "  Defining region is optimizable\n");

        LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];

        if (ReduceRPInRegion(It, Reg, Mask, RematUseful))

          return true;

      }


      for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {

        // We are only collecting regions in which the register is a live-in

        // (and may be live-through).

        auto It = DAG.LiveIns[LIRegion].find(Reg);

        if (It == DAG.LiveIns[LIRegion].end() || It->second.none())

          continue;

        Remat.LiveInRegions.insert(LIRegion);


        // Account for the reduction in RP due to the rematerialization in an

        // optimizable region in which the defined register is a live-in. This

        // is exact for live-through region but optimistic in the using region,

        // where RP is actually reduced only if maximum RP is reached somewhere

        // between the beginning of the region and the rematerializable

        // instruction's use.

        if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {

          REMAT_DEBUG(dbgs() << "  Live-in in region " << LIRegion << '\n');

          if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg],

                               RematUseful))

            return true;

        }

      }


      // If the instruction is not a live-in or live-out in any optimizable

      // region then there is no point in rematerializing it.

      if (!RematUseful) {

        Rematerializations.pop_back();

        REMAT_DEBUG(dbgs() << "  No impact, not rematerializing instruction\n");

      } else {

        RematRegs.insert(Reg);

      }

    }

  }


  if (TargetOcc) {

    // We were trying to increase occupancy but failed, abort the stage.

    REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n");

    Rematerializations.clear();

    return false;

  }

  REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");

  return !Rematerializations.empty();

}


void PreRARematStage::rematerialize() {

  const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();


  // Collect regions whose RP changes in unpredictable way; we will have to

  // fully recompute their RP after all rematerailizations.

  DenseSet<unsigned> RecomputeRP;


  // Rematerialize all instructions.

  for (auto &[DefMI, Remat] : Rematerializations) {

    MachineBasicBlock::iterator InsertPos(Remat.UseMI);

    Register Reg = DefMI->getOperand(0).getReg();

    unsigned DefRegion = MIRegion.at(DefMI);


    // Rematerialize DefMI to its use block.

    TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,

                       AMDGPU::NoSubRegister, *DefMI, *DAG.TRI);

    Remat.RematMI = &*std::prev(InsertPos);

    DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);


    // Update region boundaries in regions we sinked from (remove defining MI)

    // and to (insert MI rematerialized in use block). Only then we can erase

    // the original MI.

    DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr);

    auto UseRegion = MIRegion.find(Remat.UseMI);

    if (UseRegion != MIRegion.end()) {

      DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos,

                                 Remat.RematMI);

    }

    DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);

    DefMI->eraseFromParent();


    // Collect all regions impacted by the rematerialization and update their

    // live-in/RP information.

    for (unsigned I : Remat.LiveInRegions) {

      ImpactedRegions.insert({I, DAG.Pressure[I]});

      GCNRPTracker::LiveRegSet &RegionLiveIns = DAG.LiveIns[I];


#ifdef EXPENSIVE_CHECKS

      // All uses are known to be available / live at the remat point. Thus, the

      // uses should already be live in to the region.

      for (MachineOperand &MO : DefMI->operands()) {

        if (!MO.isReg() || !MO.getReg() || !MO.readsReg())

          continue;


        Register UseReg = MO.getReg();

        if (!UseReg.isVirtual())

          continue;


        LiveInterval &LI = DAG.LIS->getInterval(UseReg);

        LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());

        if (LI.hasSubRanges() && MO.getSubReg())

          LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());


        LaneBitmask LiveInMask = RegionLiveIns.at(UseReg);

        LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);

        // If this register has lanes not covered by the LiveIns, be sure they

        // do not map to any subrange. ref:

        // machine-scheduler-sink-trivial-remats.mir::omitted_subrange

        if (UncoveredLanes.any()) {

          assert(LI.hasSubRanges());

          for (LiveInterval::SubRange &SR : LI.subranges())

            assert((SR.LaneMask & UncoveredLanes).none());

        }

      }

#endif


      // The register is no longer a live-in in all regions but the one that

      // contains the single use. In live-through regions, maximum register

      // pressure decreases predictably so we can directly update it. In the

      // using region, maximum RP may or may not decrease, so we will mark it

      // for re-computation after all materializations have taken place.

      LaneBitmask PrevMask = RegionLiveIns[Reg];

      RegionLiveIns.erase(Reg);

      RegMasks.insert({{I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});

      if (Remat.UseMI->getParent() != DAG.Regions[I].first->getParent())

        DAG.Pressure[I].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);

      else

        RecomputeRP.insert(I);

    }

    // RP in the region from which the instruction was rematerialized may or may

    // not decrease.

    ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]});

    RecomputeRP.insert(DefRegion);


    // Recompute live interval to reflect the register's rematerialization.

    Register RematReg = Remat.RematMI->getOperand(0).getReg();

    DAG.LIS->removeInterval(RematReg);

    DAG.LIS->createAndComputeVirtRegInterval(RematReg);

  }


  // All regions impacted by at least one rematerialization must be rescheduled.

  // Maximum pressure must also be recomputed for all regions where it changed

  // non-predictably and checked against the target occupancy.

  unsigned DynamicVGPRBlockSize =

      MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();

  AchievedOcc = MFI.getMaxWavesPerEU();

  for (auto &[I, OriginalRP] : ImpactedRegions) {

    bool IsEmptyRegion = DAG.Regions[I].first == DAG.Regions[I].second;

    RescheduleRegions[I] = !IsEmptyRegion;

    if (!RecomputeRP.contains(I))

      continue;


    GCNRegPressure RP;

    if (IsEmptyRegion) {

      RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);

    } else {

      GCNDownwardRPTracker RPT(*DAG.LIS);

      auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first,

                                                      DAG.Regions[I].second);

      if (NonDbgMI == DAG.Regions[I].second) {

        // Region is non-empty but contains only debug instructions.

        RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);

      } else {

        RPT.reset(*NonDbgMI, &DAG.LiveIns[I]);

        RPT.advance(DAG.Regions[I].second);

        RP = RPT.moveMaxPressure();

      }

    }

    DAG.Pressure[I] = RP;

    AchievedOcc =

        std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));

  }

  REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");

}


// Copied from MachineLICM

bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {

  if (!DAG.TII->isTriviallyReMaterializable(MI))

    return false;


  for (const MachineOperand &MO : MI.all_uses()) {

    // We can't remat physreg uses, unless it is a constant or an ignorable

    // use (e.g. implicit exec use on VALU instructions)

    if (MO.getReg().isPhysical()) {

      if (DAG.MRI.isConstantPhysReg(MO.getReg()) || DAG.TII->isIgnorableUse(MO))

        continue;

      return false;

    }

  }


  return true;

}


void PreRARematStage::finalizeGCNSchedStage() {

  // We consider that reducing spilling is always beneficial so we never

  // rollback rematerializations in such cases. It's also possible that

  // rescheduling lowers occupancy over the one achieved just through remats, in

  // which case we do not want to rollback either (the rescheduling was already

  // reverted in PreRARematStage::shouldRevertScheduling in such cases).

  unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);

  if (!TargetOcc || MaxOcc >= *TargetOcc)

    return;


  REMAT_DEBUG(dbgs() << "Rolling back all rematerializations\n");

  const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();


  // Rollback the rematerializations.

  for (const auto &[DefMI, Remat] : Rematerializations) {

    MachineInstr &RematMI = *Remat.RematMI;

    unsigned DefRegion = MIRegion.at(DefMI);

    MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);

    MachineBasicBlock *MBB = RegionBB[DefRegion];

    Register Reg = RematMI.getOperand(0).getReg();


    // Re-rematerialize MI at the end of its original region. Note that it may

    // not be rematerialized exactly in the same position as originally within

    // the region, but it should not matter much.

    TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI,

                       *DAG.TRI);

    MachineInstr *NewMI = &*std::prev(InsertPos);

    DAG.LIS->InsertMachineInstrInMaps(*NewMI);


    auto UseRegion = MIRegion.find(Remat.UseMI);

    if (UseRegion != MIRegion.end()) {

      DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], RematMI,

                                 nullptr);

    }

    DAG.updateRegionBoundaries(DAG.Regions[DefRegion], InsertPos, NewMI);


    // Erase rematerialized MI.

    DAG.LIS->RemoveMachineInstrFromMaps(RematMI);

    RematMI.eraseFromParent();


    // Recompute live interval for the re-rematerialized register

    DAG.LIS->removeInterval(Reg);

    DAG.LIS->createAndComputeVirtRegInterval(Reg);


    // Re-add the register as a live-in in all regions it used to be one in.

    for (unsigned LIRegion : Remat.LiveInRegions)

      DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})});

  }


  // Reset RP in all impacted regions.

  for (auto &[I, OriginalRP] : ImpactedRegions)

    DAG.Pressure[I] = OriginalRP;


  GCNSchedStage::finalizeGCNSchedStage();

}


void GCNScheduleDAGMILive::updateRegionBoundaries(

    RegionBoundaries &RegionBounds, MachineBasicBlock::iterator MI,

    MachineInstr *NewMI) {

  assert((!NewMI || NewMI != RegionBounds.second) &&

         "cannot remove at region end");


  if (RegionBounds.first == RegionBounds.second) {

    assert(NewMI && "cannot remove from an empty region");

    RegionBounds.first = NewMI;

    return;

  }


  // We only care for modifications at the beginning of a non-empty region since

  // the upper region boundary is exclusive.

  if (MI != RegionBounds.first)

    return;

  if (!NewMI)

    RegionBounds.first = std::next(MI); // Removal

  else

    RegionBounds.first = NewMI; // Insertion

}


static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {

  const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);

  return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {

    return SII->isIGLPMutationOnly(MI->getOpcode());

  });

}


GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(

    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,

    bool RemoveKillFlags)

    : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}


void GCNPostScheduleDAGMILive::schedule() {

  HasIGLPInstrs = hasIGLPInstrs(this);

  if (HasIGLPInstrs) {

    SavedMutations.clear();

    SavedMutations.swap(Mutations);

    addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));

  }


  ScheduleDAGMI::schedule();

}


void GCNPostScheduleDAGMILive::finalizeSchedule() {

  if (HasIGLPInstrs)

    SavedMutations.swap(Mutations);


  ScheduleDAGMI::finalizeSchedule();

}

SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:102

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:103

UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:111

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:112

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUBaseInfo.h

AMDGPUIGroupLP.h

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

GCNRegPressure.h
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...

GCNTrackers
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))

DisableClusteredLowOccupancy
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))

REMAT_PREFIX
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
Definition: GCNSchedStrategy.cpp:1089

getLastMIForRegion
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
Definition: GCNSchedStrategy.cpp:817

RelaxedOcc
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))

REMAT_DEBUG
#define REMAT_DEBUG(X)
Definition: GCNSchedStrategy.cpp:1090

DisableUnclusterHighRP
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))

printScheduleModel
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
Definition: GCNSchedStrategy.cpp:1374

hasIGLPInstrs
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
Definition: GCNSchedStrategy.cpp:2100

canUsePressureDiffs
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
Definition: GCNSchedStrategy.cpp:149

getRegisterPressures
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
Definition: GCNSchedStrategy.cpp:165

ScheduleMetricBias
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))

GCNSchedStrategy.h

UseReg
static Register UseReg(const MachineOperand &MO)
Definition: HexagonCopyToCombine.cpp:245

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:110

LaneBitmask.h
A common definition of LaneBitmask for use in TableGen and CodeGen.

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

TRI
Register const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:2118

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:106

if
if(PassOpts->AAPipeline)
Definition: PassBuilderBindings.cpp:64

RegisterClassInfo.h

SIMachineFunctionInfo.h

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:51

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:119

T

llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition: AMDGPUSubtarget.h:169

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:392

llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341

llvm::BitVector::none
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:188

llvm::ClusteredLowOccStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition: GCNSchedStrategy.cpp:1069

llvm::ClusteredLowOccStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition: GCNSchedStrategy.cpp:1534

llvm::ClusteredLowOccStage::initGCNRegion
bool initGCNRegion() override
Definition: GCNSchedStrategy.cpp:1230

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:33

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177

llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:319

llvm::DenseMapBase::empty
bool empty() const
Definition: DenseMap.h:119

llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:87

llvm::DenseMapBase::at
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:221

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230

llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:131

llvm::DenseMap
Definition: DenseMap.h:730

llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263

llvm::Function
Definition: Function.h:64

llvm::GCNDownwardRPTracker
Definition: GCNRegPressure.h:348

llvm::GCNDownwardRPTracker::advance
bool advance(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state at the next MI.
Definition: GCNRegPressure.cpp:680

llvm::GCNDownwardRPTracker::bumpDownwardPressure
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
Definition: GCNRegPressure.cpp:732

llvm::GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
Definition: GCNSchedStrategy.cpp:537

llvm::GCNMaxILPSchedStrategy::tryCandidate
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
Definition: GCNSchedStrategy.cpp:542

llvm::GCNMaxMemoryClauseSchedStrategy::tryCandidate
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
Definition: GCNSchedStrategy.cpp:644

llvm::GCNMaxMemoryClauseSchedStrategy::GCNMaxMemoryClauseSchedStrategy
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
Definition: GCNSchedStrategy.cpp:628

llvm::GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
Definition: GCNSchedStrategy.cpp:527

llvm::GCNPostScheduleDAGMILive::finalizeSchedule
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition: GCNSchedStrategy.cpp:2123

llvm::GCNPostScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: GCNSchedStrategy.cpp:2112

llvm::GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Definition: GCNSchedStrategy.cpp:2107

llvm::GCNRPTarget
Models a register pressure target, allowing to evaluate and track register savings against that targe...
Definition: GCNRegPressure.h:184

llvm::GCNRPTracker
Definition: GCNRegPressure.h:260

llvm::GCNRPTracker::getPressure
GCNRegPressure getPressure() const
Definition: GCNRegPressure.h:290

llvm::GCNSchedStage::initGCNRegion
virtual bool initGCNRegion()
Definition: GCNSchedStrategy.cpp:1153

llvm::GCNSchedStage::S
GCNSchedStrategy & S
Definition: GCNSchedStrategy.h:314

llvm::GCNSchedStage::PressureBefore
GCNRegPressure PressureBefore
Definition: GCNSchedStrategy.h:334

llvm::GCNSchedStage::revertScheduling
void revertScheduling()
Definition: GCNSchedStrategy.cpp:1574

llvm::GCNSchedStage::isRegionWithExcessRP
bool isRegionWithExcessRP() const
Definition: GCNSchedStrategy.h:375

llvm::GCNSchedStage::mayCauseSpilling
bool mayCauseSpilling(unsigned WavesAfter)
Definition: GCNSchedStrategy.cpp:1564

llvm::GCNSchedStage::getScheduleMetrics
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
Definition: GCNSchedStrategy.cpp:1395

llvm::GCNSchedStage::DAG
GCNScheduleDAGMILive & DAG
Definition: GCNSchedStrategy.h:312

llvm::GCNSchedStage::StageID
const GCNSchedStageID StageID
Definition: GCNSchedStrategy.h:322

llvm::GCNSchedStage::Unsched
std::vector< MachineInstr * > Unsched
Definition: GCNSchedStrategy.h:331

llvm::GCNSchedStage::PressureAfter
GCNRegPressure PressureAfter
Definition: GCNSchedStrategy.h:337

llvm::GCNSchedStage::MF
MachineFunction & MF
Definition: GCNSchedStrategy.h:316

llvm::GCNSchedStage::finalizeGCNRegion
void finalizeGCNRegion()
Definition: GCNSchedStrategy.cpp:1260

llvm::GCNSchedStage::MFI
SIMachineFunctionInfo & MFI
Definition: GCNSchedStrategy.h:318

llvm::GCNSchedStage::checkScheduling
void checkScheduling()
Definition: GCNSchedStrategy.cpp:1277

llvm::GCNSchedStage::RegionIdx
unsigned RegionIdx
Definition: GCNSchedStrategy.h:328

llvm::GCNSchedStage::advanceRegion
void advanceRegion()
Definition: GCNSchedStrategy.h:388

llvm::GCNSchedStage::computeSUnitReadyCycle
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
Definition: GCNSchedStrategy.cpp:1350

llvm::GCNSchedStage::finalizeGCNSchedStage
virtual void finalizeGCNSchedStage()
Definition: GCNSchedStrategy.cpp:1136

llvm::GCNSchedStage::initGCNSchedStage
virtual bool initGCNSchedStage()
Definition: GCNSchedStrategy.cpp:1030

llvm::GCNSchedStage::shouldRevertScheduling
virtual bool shouldRevertScheduling(unsigned WavesAfter)
Definition: GCNSchedStrategy.cpp:1463

llvm::GCNSchedStage::SavedMutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
Definition: GCNSchedStrategy.h:339

llvm::GCNSchedStage::GCNSchedStage
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Definition: GCNSchedStrategy.cpp:1026

llvm::GCNSchedStage::setupNewBlock
void setupNewBlock()
Definition: GCNSchedStrategy.cpp:1246

llvm::GCNSchedStage::CurrentMBB
MachineBasicBlock * CurrentMBB
Definition: GCNSchedStrategy.h:325

llvm::GCNSchedStage::ST
const GCNSubtarget & ST
Definition: GCNSchedStrategy.h:320

llvm::GCNSchedStrategy
This is a minimal scheduler strategy.
Definition: GCNSchedStrategy.h:45

llvm::GCNSchedStrategy::HighRPSGPRBias
const unsigned HighRPSGPRBias
Definition: GCNSchedStrategy.h:97

llvm::GCNSchedStrategy::DownwardTracker
GCNDownwardRPTracker DownwardTracker
Definition: GCNSchedStrategy.h:77

llvm::GCNSchedStrategy::GCNSchedStrategy
GCNSchedStrategy(const MachineSchedContext *C)
Definition: GCNSchedStrategy.cpp:73

llvm::GCNSchedStrategy::SchedStages
SmallVector< GCNSchedStageID, 4 > SchedStages
Definition: GCNSchedStrategy.h:71

llvm::GCNSchedStrategy::HasHighPressure
bool HasHighPressure
Definition: GCNSchedStrategy.h:85

llvm::GCNSchedStrategy::pickNodeBidirectional
SUnit * pickNodeBidirectional(bool &IsTopNode)
Definition: GCNSchedStrategy.cpp:366

llvm::GCNSchedStrategy::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
Definition: GCNSchedStrategy.cpp:324

llvm::GCNSchedStrategy::SGPRCriticalLimit
unsigned SGPRCriticalLimit
Definition: GCNSchedStrategy.h:102

llvm::GCNSchedStrategy::MaxPressure
std::vector< unsigned > MaxPressure
Definition: GCNSchedStrategy.h:60

llvm::GCNSchedStrategy::hasNextStage
bool hasNextStage() const
Definition: GCNSchedStrategy.cpp:517

llvm::GCNSchedStrategy::TargetOccupancy
unsigned TargetOccupancy
Definition: GCNSchedStrategy.h:66

llvm::GCNSchedStrategy::KnownExcessRP
bool KnownExcessRP
Definition: GCNSchedStrategy.h:89

llvm::GCNSchedStrategy::getCurrentStage
GCNSchedStageID getCurrentStage()
Definition: GCNSchedStrategy.cpp:502

llvm::GCNSchedStrategy::VGPRExcessLimit
unsigned VGPRExcessLimit
Definition: GCNSchedStrategy.h:64

llvm::GCNSchedStrategy::MF
MachineFunction * MF
Definition: GCNSchedStrategy.h:68

llvm::GCNSchedStrategy::advanceStage
bool advanceStage()
Definition: GCNSchedStrategy.cpp:507

llvm::GCNSchedStrategy::CurrentStage
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
Definition: GCNSchedStrategy.h:74

llvm::GCNSchedStrategy::VGPRCriticalLimit
unsigned VGPRCriticalLimit
Definition: GCNSchedStrategy.h:104

llvm::GCNSchedStrategy::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
Definition: GCNSchedStrategy.cpp:492

llvm::GCNSchedStrategy::getDownwardTracker
GCNDownwardRPTracker * getDownwardTracker()
Definition: GCNSchedStrategy.h:131

llvm::GCNSchedStrategy::SGPRLimitBias
unsigned SGPRLimitBias
Definition: GCNSchedStrategy.h:106

llvm::GCNSchedStrategy::SGPRExcessLimit
unsigned SGPRExcessLimit
Definition: GCNSchedStrategy.h:62

llvm::GCNSchedStrategy::Pressure
std::vector< unsigned > Pressure
Definition: GCNSchedStrategy.h:58

llvm::GCNSchedStrategy::initialize
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
Definition: GCNSchedStrategy.cpp:78

llvm::GCNSchedStrategy::UpwardTracker
GCNUpwardRPTracker UpwardTracker
Definition: GCNSchedStrategy.h:80

llvm::GCNSchedStrategy::HighRPVGPRBias
const unsigned HighRPVGPRBias
Definition: GCNSchedStrategy.h:100

llvm::GCNSchedStrategy::ErrorMargin
unsigned ErrorMargin
Definition: GCNSchedStrategy.h:94

llvm::GCNSchedStrategy::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
Definition: GCNSchedStrategy.cpp:199

llvm::GCNSchedStrategy::getTargetOccupancy
unsigned getTargetOccupancy()
Definition: GCNSchedStrategy.h:118

llvm::GCNSchedStrategy::VGPRLimitBias
unsigned VGPRLimitBias
Definition: GCNSchedStrategy.h:108

llvm::GCNSchedStrategy::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
Definition: GCNSchedStrategy.cpp:447

llvm::GCNSchedStrategy::getUpwardTracker
GCNUpwardRPTracker * getUpwardTracker()
Definition: GCNSchedStrategy.h:133

llvm::GCNSchedStrategy::getNextStage
GCNSchedStageID getNextStage() const
Definition: GCNSchedStrategy.cpp:522

llvm::GCNScheduleDAGMILive
Definition: GCNSchedStrategy.h:224

llvm::GCNScheduleDAGMILive::finalizeSchedule
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition: GCNSchedStrategy.cpp:937

llvm::GCNScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: GCNSchedStrategy.cpp:803

llvm::GCNScheduleDAGMILive::GCNScheduleDAGMILive
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
Definition: GCNSchedStrategy.cpp:762

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GCNSubtarget::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs() const
Definition: GCNSubtarget.h:1681

llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1317

llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:308

llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition: GCNSubtarget.h:1700

llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1619

llvm::GCNUpwardRPTracker
Definition: GCNRegPressure.h:303

llvm::GCNUpwardRPTracker::recede
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
Definition: GCNRegPressure.cpp:513

llvm::GenericSchedulerBase::traceCandidate
void traceCandidate(const SchedCandidate &Cand)
Definition: MachineScheduler.cpp:3395

llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition: MachineScheduler.h:1213

llvm::GenericSchedulerBase::setPolicy
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:3314

llvm::GenericSchedulerBase::RegionPolicy
MachineSchedPolicy RegionPolicy
Definition: MachineScheduler.h:1211

llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:1205

llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:1204

llvm::GenericSchedulerBase::RegExcess
@ RegExcess
Definition: MachineScheduler.h:1098

llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition: MachineScheduler.h:1103

llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition: MachineScheduler.h:1105

llvm::GenericSchedulerBase::ResourceReduce
@ ResourceReduce
Definition: MachineScheduler.h:1104

llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition: MachineScheduler.h:1101

llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:1095

llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition: MachineScheduler.h:1099

llvm::GenericSchedulerBase::PhysReg
@ PhysReg
Definition: MachineScheduler.h:1097

llvm::GenericSchedulerBase::Stall
@ Stall
Definition: MachineScheduler.h:1100

llvm::GenericSchedulerBase::Weak
@ Weak
Definition: MachineScheduler.h:1102

llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:1206

llvm::GenericScheduler
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
Definition: MachineScheduler.h:1255

llvm::GenericScheduler::BotCand
SchedCandidate BotCand
Candidate last picked from Bot boundary.
Definition: MachineScheduler.h:1312

llvm::GenericScheduler::Top
SchedBoundary Top
Definition: MachineScheduler.h:1303

llvm::GenericScheduler::TopCand
SchedCandidate TopCand
Candidate last picked from Top boundary.
Definition: MachineScheduler.h:1310

llvm::GenericScheduler::TopClusterID
unsigned TopClusterID
Definition: MachineScheduler.h:1306

llvm::GenericScheduler::Bot
SchedBoundary Bot
Definition: MachineScheduler.h:1304

llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3937

llvm::GenericScheduler::DAG
ScheduleDAGMILive * DAG
Definition: MachineScheduler.h:1300

llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3651

llvm::GenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
Definition: MachineScheduler.cpp:4255

llvm::GenericScheduler::BotClusterID
unsigned BotClusterID
Definition: MachineScheduler.h:1307

llvm::ILPInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition: GCNSchedStrategy.cpp:1552

llvm::LiveInterval::SubRange
A live range for subregisters.
Definition: LiveInterval.h:697

llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:690

llvm::LiveInterval::hasSubRanges
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:813

llvm::LiveInterval::subranges
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:785

llvm::LiveIntervals
Definition: LiveIntervals.h:55

llvm::LiveIntervals::InsertMachineInstrInMaps
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
Definition: LiveIntervals.h:285

llvm::LiveIntervals::handleMove
LLVM_ABI void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1559

llvm::LiveIntervals::getSlotIndexes
SlotIndexes * getSlotIndexes() const
Definition: LiveIntervals.h:238

llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:247

llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:295

llvm::LiveIntervals::getInterval
LiveInterval & getInterval(Register Reg)
Definition: LiveIntervals.h:133

llvm::LiveIntervals::removeInterval
void removeInterval(Register Reg)
Interval removal.
Definition: LiveIntervals.h:171

llvm::LiveIntervals::createAndComputeVirtRegInterval
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
Definition: LiveIntervals.h:157

llvm::LiveRange::getVNInfoAt
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:423

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:122

llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:443

llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:455

llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:377

llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:379

llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1149

llvm::MachineBasicBlock::getName
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:324

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:762

llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:645

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:733

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:860

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:123

llvm::MachineInstrBundleIterator< MachineInstr >

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:72

llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1136

llvm::MachineInstr::operands
mop_range operands()
Definition: MachineInstr.h:693

llvm::MachineInstr::eraseFromParent
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:770

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:368

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::hasOneDef
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
Definition: MachineRegisterInfo.h:448

llvm::MachineRegisterInfo::getMaxLaneMaskForVReg
LLVM_ABI LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
Definition: MachineRegisterInfo.cpp:512

llvm::MachineRegisterInfo::isConstantPhysReg
LLVM_ABI bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
Definition: MachineRegisterInfo.cpp:532

llvm::MachineRegisterInfo::getOneNonDBGUser
LLVM_ABI MachineInstr * getOneNonDBGUser(Register RegNo) const
If the register has a single non-Debug instruction using the specified register, returns it; otherwis...
Definition: MachineRegisterInfo.cpp:435

llvm::MemoryClauseInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition: GCNSchedStrategy.cpp:1559

llvm::OccInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition: GCNSchedStrategy.cpp:1482

llvm::PreRARematStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition: GCNSchedStrategy.cpp:1547

llvm::PreRARematStage::initGCNRegion
bool initGCNRegion() override
Definition: GCNSchedStrategy.cpp:1242

llvm::PreRARematStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition: GCNSchedStrategy.cpp:1092

llvm::PressureChange
Capture a change in pressure for a single pressure set.
Definition: RegisterPressure.h:103

llvm::PressureChange::setUnitInc
void setUnitInc(int Inc)
Definition: RegisterPressure.h:127

llvm::ReadyQueue
Helpers for implementing custom MachineSchedStrategy classes.
Definition: MachineScheduler.h:564

llvm::ReadyQueue::empty
bool empty() const
Definition: MachineScheduler.h:579

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:361

llvm::RegPressureTracker::advance
LLVM_ABI void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:933

llvm::RegPressureTracker::getDownwardPressure
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
Definition: RegisterPressure.cpp:1371

llvm::RegPressureTracker::reset
LLVM_ABI void reset()
Definition: RegisterPressure.cpp:239

llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:467

llvm::RegPressureTracker::getUpwardPressure
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
Definition: RegisterPressure.cpp:1355

llvm::RegionBase::getParent
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:362

llvm::RegionPressureMap::buildLiveRegMap
void buildLiveRegMap()
Definition: GCNSchedStrategy.cpp:923

llvm::RegionPressureMap::getLiveRegsForRegionIdx
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
Definition: GCNSchedStrategy.h:212

llvm::Region
Definition: RegionInfo.h:887

llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:99

llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:167

llvm::RegisterOperands::collect
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:562

llvm::RegisterOperands::adjustLaneLiveness
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
Definition: RegisterPressure.cpp:593

llvm::RegisterOperands::detectDeadDefs
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:573

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74

llvm::SIInstrInfo
Definition: SIInstrInfo.h:86

llvm::SIInstrInfo::isIGLPMutationOnly
bool isIGLPMutationOnly(unsigned Opcode) const
Definition: SIInstrInfo.h:1026

llvm::SIInstrInfo::getSchedModel
const TargetSchedModel & getSchedModel() const
Definition: SIInstrInfo.h:1542

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:412

llvm::SIMachineFunctionInfo::getOccupancy
unsigned getOccupancy() const
Definition: SIMachineFunctionInfo.h:1171

llvm::SIMachineFunctionInfo::increaseOccupancy
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
Definition: SIMachineFunctionInfo.h:1188

llvm::SIMachineFunctionInfo::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition: SIMachineFunctionInfo.h:841

llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: SIMachineFunctionInfo.h:1162

llvm::SIMachineFunctionInfo::getMinWavesPerEU
unsigned getMinWavesPerEU() const
Definition: SIMachineFunctionInfo.h:1157

llvm::SIMachineFunctionInfo::getMinAllowedOccupancy
unsigned getMinAllowedOccupancy() const
Definition: SIMachineFunctionInfo.h:1175

llvm::SIMachineFunctionInfo::isDynamicVGPREnabled
bool isDynamicVGPREnabled() const
Definition: SIMachineFunctionInfo.h:840

llvm::SIMachineFunctionInfo::limitOccupancy
void limitOccupancy(const MachineFunction &MF)
Definition: SIMachineFunctionInfo.cpp:207

llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:40

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249

llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:387

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:277

llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:312

llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:305

llvm::SUnit::ParentClusterIdx
unsigned ParentClusterIdx
The parent cluster id.
Definition: ScheduleDAG.h:288

llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:476

llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:473

llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:269

llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:399

llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition: MachineScheduler.h:856

llvm::SchedBoundary::getLatencyStallCycles
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition: MachineScheduler.cpp:2593

llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:865

llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:980

llvm::SchedBoundary::pickOnlyChoice
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
Definition: MachineScheduler.cpp:3159

llvm::SchedBoundary::Available
ReadyQueue Available
Definition: MachineScheduler.h:869

llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition: MachineScheduler.h:988

llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:870

llvm::SchedBoundary::removeReady
LLVM_ABI void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Definition: MachineScheduler.cpp:3147

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:116

llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:306

llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:145

llvm::ScheduleDAGInstrs::ScheduleSingleMIRegions
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
Definition: ScheduleDAGInstrs.h:129

llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:151

llvm::ScheduleDAGInstrs::finalizeSchedule
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition: ScheduleDAGInstrs.h:364

llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:303

llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:424

llvm::ScheduleDAGInstrs::exitRegion
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Definition: ScheduleDAGInstrs.cpp:208

llvm::ScheduleDAGInstrs::RegionBegin
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:148

llvm::ScheduleDAGInstrs::NumRegionInstrs
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
Definition: ScheduleDAGInstrs.h:154

llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:422

llvm::ScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: MachineScheduler.cpp:1693

llvm::ScheduleDAGMILive::getPressureDiff
PressureDiff & getPressureDiff(const SUnit *SU)
Definition: MachineScheduler.h:490

llvm::ScheduleDAGMILive::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:1457

llvm::ScheduleDAGMILive::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Definition: MachineScheduler.h:443

llvm::ScheduleDAGMILive::getBotRPTracker
const RegPressureTracker & getBotRPTracker() const
Definition: MachineScheduler.h:481

llvm::ScheduleDAGMILive::isTrackingPressure
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
Definition: MachineScheduler.h:473

llvm::ScheduleDAGMILive::getTopRPTracker
const RegPressureTracker & getTopRPTracker() const
Definition: MachineScheduler.h:477

llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:445

llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:308

llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:990

llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:356

llvm::ScheduleDAGMI::top
MachineBasicBlock::iterator top() const
Definition: MachineScheduler.h:361

llvm::ScheduleDAGMI::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: MachineScheduler.cpp:1059

llvm::ScheduleDAGMI::bottom
MachineBasicBlock::iterator bottom() const
Definition: MachineScheduler.h:362

llvm::ScheduleDAGMI::finishBlock
void finishBlock() override
Cleans up after scheduling in the given block.
Definition: MachineScheduler.cpp:995

llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:311

llvm::ScheduleDAGMI::placeDebugValues
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
Definition: MachineScheduler.cpp:1197

llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:315

llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:587

llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:584

llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588

llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:585

llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:586

llvm::ScheduleMetrics
Definition: GCNSchedStrategy.h:166

llvm::ScheduleMetrics::ScaleFactor
static const unsigned ScaleFactor
Definition: GCNSchedStrategy.h:182

llvm::ScheduleMetrics::getMetric
unsigned getMetric() const
Definition: GCNSchedStrategy.h:176

llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:66

llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:177

llvm::SlotIndex::getRegSlot
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:238

llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:298

llvm::SlotIndexes::getMBBStartIdx
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:461

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:82

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:79

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:414

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:270

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:268

llvm::TargetInstrInfo::isTriviallyReMaterializable
bool isTriviallyReMaterializable(const MachineInstr &MI) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Definition: TargetInstrInfo.h:150

llvm::TargetInstrInfo::isIgnorableUse
virtual bool isIgnorableUse(const MachineOperand &MO) const
Given MO is a PhysReg use return if it can be ignored for the purpose of instruction rematerializatio...
Definition: TargetInstrInfo.h:159

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237

llvm::TargetRegisterInfo::getSubRegIndexLaneMask
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
Definition: TargetRegisterInfo.h:422

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:31

llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:146

llvm::UnclusteredHighRPStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition: GCNSchedStrategy.cpp:1038

llvm::UnclusteredHighRPStage::initGCNRegion
bool initGCNRegion() override
Definition: GCNSchedStrategy.cpp:1215

llvm::UnclusteredHighRPStage::finalizeGCNSchedStage
void finalizeGCNSchedStage() override
Definition: GCNSchedStrategy.cpp:1141

llvm::UnclusteredHighRPStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition: GCNSchedStrategy.cpp:1495

llvm::VNInfo
VNInfo - Value Number Information.
Definition: LiveInterval.h:54

llvm::Value::printAsOperand
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5305

llvm::cl::opt
Definition: CommandLine.h:1429

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194

llvm::detail::DenseSetImpl::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:169

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:164

false
Definition: MachinePipeliner.cpp:239

llvm::AArch64::RP
@ RP
Definition: AArch64ISelLowering.h:33

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:1343

llvm::AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:1501

llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
Definition: AMDGPUBaseInfo.cpp:1397

llvm::AMDGPU::SchedulingPhase::PostRA
@ PostRA

llvm::AMDGPU::SchedulingPhase::PreRAReentry
@ PreRAReentry

llvm::AMDGPU::SchedulingPhase::Initial
@ Initial

llvm::AMDGPU::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize(const Function &F)
Definition: AMDGPUBaseInfo.cpp:2393

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:621

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition: GCNRegPressure.cpp:23

llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
Definition: GCNRegPressure.cpp:237

llvm::getWeakLeft
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3841

llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34

llvm::NodeOrder
@ NodeOrder
Definition: SIMachineScheduler.h:37

llvm::NoCand
@ NoCand
Definition: SIMachineScheduler.h:32

llvm::getRegPressure
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
Definition: GCNRegPressure.h:491

llvm::RegionBoundaries
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
Definition: GCNSchedStrategy.h:222

llvm::createIGroupLPDAGMutation
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
Definition: AMDGPUIGroupLP.cpp:2686

llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:551

llvm::VerifyScheduling
LLVM_ABI cl::opt< bool > VerifyScheduling

llvm::skipDebugInstructionsForward
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
Definition: MachineBasicBlock.h:1461

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751

llvm::tryPressure
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition: MachineScheduler.cpp:3802

llvm::GCNSchedStageID
GCNSchedStageID
Definition: GCNSchedStrategy.h:29

llvm::GCNSchedStageID::UnclusteredHighRPReschedule
@ UnclusteredHighRPReschedule

llvm::GCNSchedStageID::MemoryClauseInitialSchedule
@ MemoryClauseInitialSchedule

llvm::GCNSchedStageID::ILPInitialSchedule
@ ILPInitialSchedule

llvm::GCNSchedStageID::PreRARematerialize
@ PreRARematerialize

llvm::GCNSchedStageID::OccInitialSchedule
@ OccInitialSchedule

llvm::GCNSchedStageID::ClusteredLowOccupancyReschedule
@ ClusteredLowOccupancyReschedule

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167

llvm::tryLatency
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition: MachineScheduler.cpp:3484

llvm::skipDebugInstructionsBackward
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
Definition: MachineBasicBlock.h:1474

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:908

llvm::isTheSameCluster
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
Definition: ScheduleDAG.h:244

llvm::tryGreater
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:3468

llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312

llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886

llvm::getLiveRegMap
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
Definition: GCNRegPressure.h:441

llvm::getLiveRegsBefore
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
Definition: GCNRegPressure.h:484

llvm::tryLess
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition: MachineScheduler.cpp:3452

llvm::printMBBReference
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:120

llvm::biasPhysReg
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Definition: MachineScheduler.cpp:3852

std
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856

true
Definition: SPIRVConvergenceRegionAnalysis.cpp:40

EarlierIssuingCycle
Definition: GCNSchedStrategy.cpp:1367

EarlierIssuingCycle::operator()
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
Definition: GCNSchedStrategy.cpp:1368

llvm::GCNRegPressure
Definition: GCNRegPressure.h:31

llvm::GCNRegPressure::getVGPRNum
unsigned getVGPRNum(bool UnifiedVGPRFile) const
Definition: GCNRegPressure.h:48

llvm::GCNRegPressure::getOccupancy
unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize) const
Definition: GCNRegPressure.h:86

llvm::GCNRegPressure::getArchVGPRNum
unsigned getArchVGPRNum() const
Definition: GCNRegPressure.h:74

llvm::GCNRegPressure::getAGPRNum
unsigned getAGPRNum() const
Definition: GCNRegPressure.h:76

llvm::GCNRegPressure::getSGPRNum
unsigned getSGPRNum() const
Definition: GCNRegPressure.h:45

llvm::GCNRegPressure::less
bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Compares this GCNRegpressure to O, returning true if this is less.
Definition: GCNRegPressure.cpp:99

llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:1119

llvm::GenericSchedulerBase::CandPolicy::ReduceLatency
bool ReduceLatency
Definition: MachineScheduler.h:1120

llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition: MachineScheduler.h:1157

llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition: MachineScheduler.h:1190

llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:1161

llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:1178

llvm::GenericSchedulerBase::SchedCandidate::initResourceDelta
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:3250

llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition: MachineScheduler.h:1170

llvm::GenericSchedulerBase::SchedCandidate::AtTop
bool AtTop
Definition: MachineScheduler.h:1167

llvm::GenericSchedulerBase::SchedCandidate::ResDelta
SchedResourceDelta ResDelta
Definition: MachineScheduler.h:1173

llvm::GenericSchedulerBase::SchedCandidate::isValid
bool isValid() const
Definition: MachineScheduler.h:1187

llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:1164

llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition: MachineScheduler.h:1158

llvm::GenericSchedulerBase::SchedResourceDelta
Status of an instruction's critical resource consumption.
Definition: MachineScheduler.h:1137

llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition: MachineScheduler.h:1139

llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition: MachineScheduler.h:1142

llvm::LaneBitmask
Definition: LaneBitmask.h:40

llvm::LaneBitmask::none
constexpr bool none() const
Definition: LaneBitmask.h:52

llvm::LaneBitmask::any
constexpr bool any() const
Definition: LaneBitmask.h:53

llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:81

llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:143

llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:151

llvm::MachineSchedPolicy::OnlyTopDown
bool OnlyTopDown
Definition: MachineScheduler.h:208

llvm::MachineSchedPolicy::DisableLatencyHeuristic
bool DisableLatencyHeuristic
Definition: MachineScheduler.h:213

llvm::MachineSchedPolicy::OnlyBottomUp
bool OnlyBottomUp
Definition: MachineScheduler.h:209

llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition: RegisterPressure.h:245

llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition: RegisterPressure.h:246

llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:244

llvm::SchedRemainder::IsAcyclicLatencyLimited
bool IsAcyclicLatencyLimited
Definition: MachineScheduler.h:620

llvm::cl::desc
Definition: CommandLine.h:410