44#define DEBUG_TYPE "rewrite-partial-reg-uses"
48class GCNRewritePartialRegUsesImpl {
67 SubRegMap &SubRegs)
const;
84 unsigned CoverSubregIdx,
85 SubRegMap &SubRegs)
const;
90 SubRegMap &SubRegs)
const;
96 unsigned shiftSubReg(
unsigned SubReg,
unsigned RShift)
const;
100 unsigned getSubReg(
unsigned Offset,
unsigned Size)
const;
108 unsigned SubRegIdx)
const;
119 getAllocatableAndAlignedRegClassMask(
unsigned AlignNumBits)
const;
136 return "Rewrite Partial Register Uses";
152unsigned GCNRewritePartialRegUsesImpl::getSubReg(
unsigned Offset,
153 unsigned Size)
const {
156 for (
unsigned Idx = 1, E =
TRI->getNumSubRegIndices();
Idx < E; ++
Idx) {
167unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(
unsigned SubReg,
168 unsigned RShift)
const {
173const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
176 SuperRegMasks.try_emplace({RC, SubRegIdx},
nullptr);
179 if (RCI.getSubReg() == SubRegIdx) {
180 I->second = RCI.getMask();
189GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
190 unsigned AlignNumBits)
const {
192 AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
196 for (
unsigned ClassID = 0; ClassID <
TRI->getNumRegClasses(); ++ClassID) {
197 auto *RC =
TRI->getRegClass(ClassID);
206GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
208 SubRegMap &SubRegs)
const {
210 unsigned RCAlign =
TRI->getRegClassAlignmentNumBits(RC);
211 LLVM_DEBUG(
dbgs() <<
" Shift " << RShift <<
", reg align " << RCAlign
214 BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
215 for (
auto &[OldSubReg, NewSubReg] : SubRegs) {
218 auto *SubRegRC =
TRI->getSubRegisterClass(RC, OldSubReg);
224 << (SubRegRC->isAllocatable() ?
"" :
" not alloc")
227 if (OldSubReg == CoverSubregIdx) {
229 assert(SubRegRC->isAllocatable());
230 NewSubReg = AMDGPU::NoSubRegister;
233 NewSubReg = shiftSubReg(OldSubReg, RShift);
241 const uint32_t *
Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
242 : SubRegRC->getSubClassMask();
246 ClassMask.clearBitsNotInMask(Mask);
249 LLVM_DEBUG(
dbgs() <<
", num regclasses " << ClassMask.count() <<
'\n');
257 unsigned MinNumBits = std::numeric_limits<unsigned>::max();
258 for (
unsigned ClassID : ClassMask.set_bits()) {
259 auto *RC =
TRI->getRegClass(ClassID);
260 unsigned NumBits =
TRI->getRegSizeInBits(*RC);
261 if (NumBits < MinNumBits) {
262 MinNumBits = NumBits;
269 for (
auto [OldSubReg, NewSubReg] : SubRegs)
271 assert(MinRC ==
TRI->getSubClassWithSubReg(MinRC, NewSubReg));
276 return (MinRC != RC || RShift != 0) ? MinRC :
nullptr;
281 SubRegMap &SubRegs)
const {
282 unsigned CoverSubreg = AMDGPU::NoSubRegister;
283 unsigned Offset = std::numeric_limits<unsigned>::max();
285 for (
auto [
SubReg, SRI] : SubRegs) {
286 unsigned SubRegOffset =
TRI->getSubRegIdxOffset(
SubReg);
287 unsigned SubRegEnd = SubRegOffset +
TRI->getSubRegIdxSize(
SubReg);
288 if (SubRegOffset <
Offset) {
290 CoverSubreg = AMDGPU::NoSubRegister;
292 if (SubRegEnd >
End) {
294 CoverSubreg = AMDGPU::NoSubRegister;
296 if (SubRegOffset ==
Offset && SubRegEnd ==
End)
301 if (CoverSubreg != AMDGPU::NoSubRegister)
302 return getRegClassWithShiftedSubregs(RC,
Offset, CoverSubreg, SubRegs);
307 unsigned MaxAlign = 0;
308 for (
auto [
SubReg, SRI] : SubRegs)
309 MaxAlign = std::max(MaxAlign,
TRI->getSubRegAlignmentNumBits(RC,
SubReg));
311 unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
312 for (
auto [
SubReg, SRI] : SubRegs) {
313 if (
TRI->getSubRegAlignmentNumBits(RC,
SubReg) != MaxAlign)
315 FirstMaxAlignedSubRegOffset =
316 std::min(FirstMaxAlignedSubRegOffset,
TRI->getSubRegIdxOffset(
SubReg));
317 if (FirstMaxAlignedSubRegOffset ==
Offset)
321 unsigned NewOffsetOfMaxAlignedSubReg =
324 if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
327 unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
328 return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs);
333void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
335 if (!LIS->hasInterval(OldReg))
338 auto &OldLI = LIS->getInterval(OldReg);
339 auto &NewLI = LIS->createEmptyInterval(NewReg);
341 auto &
Allocator = LIS->getVNInfoAllocator();
342 NewLI.setWeight(OldLI.weight());
344 for (
auto &SR : OldLI.subranges()) {
346 return SR.LaneMask ==
TRI->getSubRegIndexLaneMask(
P.first);
349 if (
I == SubRegs.end()) {
368 LIS->removeInterval(OldReg);
369 LIS->removeInterval(NewReg);
370 LIS->createAndComputeVirtRegInterval(NewReg);
374 if (
unsigned NewSubReg =
I->second)
375 NewLI.createSubRangeFrom(Allocator,
376 TRI->getSubRegIndexLaneMask(NewSubReg), SR);
378 NewLI.assign(SR, Allocator);
383 NewLI.assign(OldLI, Allocator);
385 LIS->removeInterval(OldReg);
388bool GCNRewritePartialRegUsesImpl::rewriteReg(
Register Reg)
const {
393 if (MO.getSubReg() == AMDGPU::NoSubRegister)
395 SubRegs.try_emplace(MO.getSubReg());
401 auto *RC =
MRI->getRegClass(Reg);
403 <<
':' <<
TRI->getRegClassName(RC) <<
'\n');
405 auto *NewRC = getMinSizeReg(RC, SubRegs);
411 Register NewReg =
MRI->createVirtualRegister(NewRC);
413 <<
TRI->getRegClassName(RC) <<
" -> "
415 <<
TRI->getRegClassName(NewRC) <<
'\n');
421 if (MO.isDebug() && MO.getSubReg() == 0)
423 unsigned NewSubReg = SubRegs[MO.getSubReg()];
424 MO.setSubReg(NewSubReg);
425 if (NewSubReg == AMDGPU::NoSubRegister && MO.isDef())
426 MO.setIsUndef(
false);
430 updateLiveIntervals(Reg, NewReg, SubRegs);
439 bool Changed =
false;
440 for (
size_t I = 0, E =
MRI->getNumVirtRegs();
I < E; ++
I) {
446bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(
MachineFunction &MF) {
448 getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
450 GCNRewritePartialRegUsesImpl Impl(LIS);
458 if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
468char GCNRewritePartialRegUsesLegacy::ID;
473 "Rewrite Partial Register Uses",
false,
false)
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
Interface definition for SIRegisterInfo.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Represents analyses that only rely on functions' control flow.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
StringRef - Represent a constant reference to a string, i.e.
bool isValid() const
Returns true if this iterator is still pointing at a valid entry.
TargetInstrInfo - Interface to description of machine instruction set.
bool isAllocatable() const
Return true if this register class may be used to create virtual registers.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
char & GCNRewritePartialRegUsesID
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.