34#define DEBUG_TYPE "si-memory-legalizer"
35#define PASS_NAME "SI Memory Legalizer"
39 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
61enum class SIAtomicScope {
73enum class SIAtomicAddrSpace {
82 FLAT = GLOBAL |
LDS | SCRATCH,
85 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
88 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
93class SIMemOpInfo final {
96 friend class SIMemOpAccess;
100 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
101 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
102 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
103 bool IsCrossAddressSpaceOrdering =
false;
104 bool IsVolatile =
false;
105 bool IsNonTemporal =
false;
106 bool IsLastUse =
false;
107 bool IsCooperative =
false;
110 const GCNSubtarget &ST,
112 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
113 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
114 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
115 bool IsCrossAddressSpaceOrdering =
true,
116 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
117 bool IsVolatile =
false,
bool IsNonTemporal =
false,
118 bool IsLastUse =
false,
bool IsCooperative =
false)
119 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
120 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
121 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
122 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
123 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
125 if (Ordering == AtomicOrdering::NotAtomic) {
126 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
127 assert(Scope == SIAtomicScope::NONE &&
128 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
129 !IsCrossAddressSpaceOrdering &&
130 FailureOrdering == AtomicOrdering::NotAtomic);
134 assert(Scope != SIAtomicScope::NONE &&
135 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
136 SIAtomicAddrSpace::NONE &&
137 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
138 SIAtomicAddrSpace::NONE);
143 if ((OrderingAddrSpace == InstrAddrSpace) &&
145 this->IsCrossAddressSpaceOrdering =
false;
149 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
150 SIAtomicAddrSpace::NONE) {
151 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
152 }
else if ((InstrAddrSpace &
153 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
154 SIAtomicAddrSpace::NONE) {
155 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
156 }
else if ((InstrAddrSpace &
157 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
158 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
159 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
164 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
165 this->Scope = SIAtomicScope::AGENT;
171 SIAtomicScope getScope()
const {
184 return FailureOrdering;
189 SIAtomicAddrSpace getInstrAddrSpace()
const {
190 return InstrAddrSpace;
195 SIAtomicAddrSpace getOrderingAddrSpace()
const {
196 return OrderingAddrSpace;
201 bool getIsCrossAddressSpaceOrdering()
const {
202 return IsCrossAddressSpaceOrdering;
207 bool isVolatile()
const {
213 bool isNonTemporal()
const {
214 return IsNonTemporal;
219 bool isLastUse()
const {
return IsLastUse; }
222 bool isCooperative()
const {
return IsCooperative; }
226 bool isAtomic()
const {
227 return Ordering != AtomicOrdering::NotAtomic;
232class SIMemOpAccess final {
234 const AMDGPUMachineModuleInfo *MMI =
nullptr;
235 const GCNSubtarget &ST;
239 const char *Msg)
const;
245 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
246 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
249 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
253 std::optional<SIMemOpInfo>
259 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST);
262 std::optional<SIMemOpInfo>
267 std::optional<SIMemOpInfo>
272 std::optional<SIMemOpInfo>
277 std::optional<SIMemOpInfo>
281class SICacheControl {
285 const GCNSubtarget &ST;
288 const SIInstrInfo *TII =
nullptr;
295 SICacheControl(
const GCNSubtarget &ST);
305 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
312 SIAtomicAddrSpace AddrSpace)
const = 0;
319 SIAtomicAddrSpace AddrSpace)
const = 0;
326 SIAtomicAddrSpace AddrSpace)
const = 0;
332 SIAtomicAddrSpace AddrSpace,
333 SIMemOp
Op,
bool IsVolatile,
335 bool IsLastUse =
false)
const = 0;
337 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
342 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
344 "cooperative atomics are not available on this architecture");
355 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
356 bool IsCrossAddrSpaceOrdering, Position Pos,
366 SIAtomicAddrSpace AddrSpace,
367 Position Pos)
const = 0;
377 SIAtomicAddrSpace AddrSpace,
378 bool IsCrossAddrSpaceOrdering,
379 Position Pos)
const = 0;
388 virtual ~SICacheControl() =
default;
391class SIGfx6CacheControl :
public SICacheControl {
408 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
412 SIAtomicAddrSpace AddrSpace)
const override;
416 SIAtomicAddrSpace AddrSpace)
const override;
420 SIAtomicAddrSpace AddrSpace)
const override;
423 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
424 bool IsVolatile,
bool IsNonTemporal,
425 bool IsLastUse)
const override;
428 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
429 bool IsCrossAddrSpaceOrdering, Position Pos,
434 SIAtomicAddrSpace AddrSpace,
435 Position Pos)
const override;
439 SIAtomicAddrSpace AddrSpace,
440 bool IsCrossAddrSpaceOrdering,
441 Position Pos)
const override;
444class SIGfx7CacheControl :
public SIGfx6CacheControl {
447 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
451 SIAtomicAddrSpace AddrSpace,
452 Position Pos)
const override;
456class SIGfx90ACacheControl :
public SIGfx7CacheControl {
459 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
463 SIAtomicAddrSpace AddrSpace)
const override;
467 SIAtomicAddrSpace AddrSpace)
const override;
471 SIAtomicAddrSpace AddrSpace)
const override;
474 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
475 bool IsVolatile,
bool IsNonTemporal,
476 bool IsLastUse)
const override;
479 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
480 bool IsCrossAddrSpaceOrdering, Position Pos,
485 SIAtomicAddrSpace AddrSpace,
486 Position Pos)
const override;
490 SIAtomicAddrSpace AddrSpace,
491 bool IsCrossAddrSpaceOrdering,
492 Position Pos)
const override;
495class SIGfx940CacheControl :
public SIGfx90ACacheControl {
517 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
521 SIAtomicAddrSpace AddrSpace)
const override;
525 SIAtomicAddrSpace AddrSpace)
const override;
529 SIAtomicAddrSpace AddrSpace)
const override;
532 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
533 bool IsVolatile,
bool IsNonTemporal,
534 bool IsLastUse)
const override;
537 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
540 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
541 Position Pos)
const override;
544class SIGfx10CacheControl :
public SIGfx7CacheControl {
555 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
559 SIAtomicAddrSpace AddrSpace)
const override;
562 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
563 bool IsVolatile,
bool IsNonTemporal,
564 bool IsLastUse)
const override;
567 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
568 bool IsCrossAddrSpaceOrdering, Position Pos,
573 SIAtomicAddrSpace AddrSpace,
574 Position Pos)
const override;
579class SIGfx11CacheControl :
public SIGfx10CacheControl {
581 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
585 SIAtomicAddrSpace AddrSpace)
const override;
588 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
589 bool IsVolatile,
bool IsNonTemporal,
590 bool IsLastUse)
const override;
593class SIGfx12CacheControl :
public SIGfx11CacheControl {
614 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
617 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {
620 assert(!
ST.hasGFX1250Insts() ||
ST.isCuModeEnabled());
624 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
625 bool IsCrossAddrSpaceOrdering, Position Pos,
629 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
632 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
633 bool IsVolatile,
bool IsNonTemporal,
634 bool IsLastUse)
const override;
636 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
638 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
641 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
642 Position Pos)
const override;
646 SIAtomicAddrSpace AddrSpace)
const override {
647 return setAtomicScope(
MI, Scope, AddrSpace);
652 SIAtomicAddrSpace AddrSpace)
const override {
653 return setAtomicScope(
MI, Scope, AddrSpace);
658 SIAtomicAddrSpace AddrSpace)
const override {
659 return setAtomicScope(
MI, Scope, AddrSpace);
663class SIMemoryLegalizer final {
665 const MachineModuleInfo &MMI;
667 std::unique_ptr<SICacheControl> CC =
nullptr;
670 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
674 bool isAtomicRet(
const MachineInstr &
MI)
const {
680 bool removeAtomicPseudoMIs();
684 bool expandLoad(
const SIMemOpInfo &MOI,
688 bool expandStore(
const SIMemOpInfo &MOI,
692 bool expandAtomicFence(
const SIMemOpInfo &MOI,
696 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
700 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
701 bool run(MachineFunction &MF);
708 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
710 void getAnalysisUsage(AnalysisUsage &AU)
const override {
715 StringRef getPassName()
const override {
719 bool runOnMachineFunction(MachineFunction &MF)
override;
723 {
"global", SIAtomicAddrSpace::GLOBAL},
724 {
"local", SIAtomicAddrSpace::LDS},
732 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
734 for (
const auto &[Name, Val] : ASNames)
735 OS <<
LS <<
'\'' <<
Name <<
'\'';
743static std::optional<SIAtomicAddrSpace>
745 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
751 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
752 for (
const auto &[Prefix, Suffix] : MMRA) {
753 if (Prefix != FenceASPrefix)
756 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
759 diagnoseUnknownMMRAASName(
MI, Suffix);
762 if (Result == SIAtomicAddrSpace::NONE)
771 const char *Msg)
const {
772 const Function &
Func =
MI->getParent()->getParent()->getFunction();
773 Func.getContext().diagnose(
774 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
777std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
779 SIAtomicAddrSpace InstrAddrSpace)
const {
781 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
783 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
785 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
787 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
790 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
793 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
796 return std::tuple(SIAtomicScope::SYSTEM,
797 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
799 return std::tuple(SIAtomicScope::AGENT,
800 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
802 return std::tuple(SIAtomicScope::CLUSTER,
803 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
805 return std::tuple(SIAtomicScope::WORKGROUP,
806 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
808 return std::tuple(SIAtomicScope::WAVEFRONT,
809 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
811 return std::tuple(SIAtomicScope::SINGLETHREAD,
812 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
816SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
818 return SIAtomicAddrSpace::FLAT;
820 return SIAtomicAddrSpace::GLOBAL;
822 return SIAtomicAddrSpace::LDS;
824 return SIAtomicAddrSpace::SCRATCH;
826 return SIAtomicAddrSpace::GDS;
828 return SIAtomicAddrSpace::OTHER;
831SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
832 const GCNSubtarget &ST)
833 : MMI(&MMI_),
ST(
ST) {}
835std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
837 assert(
MI->getNumMemOperands() > 0);
842 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
843 bool IsNonTemporal =
true;
845 bool IsLastUse =
false;
846 bool IsCooperative =
false;
850 for (
const auto &MMO :
MI->memoperands()) {
851 IsNonTemporal &= MMO->isNonTemporal();
853 IsLastUse |= MMO->getFlags() &
MOLastUse;
856 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
858 if (OpOrdering != AtomicOrdering::NotAtomic) {
859 const auto &IsSyncScopeInclusion =
861 if (!IsSyncScopeInclusion) {
862 reportUnsupported(
MI,
863 "Unsupported non-inclusive atomic synchronization scope");
867 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
869 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
870 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
876 SIAtomicScope
Scope = SIAtomicScope::NONE;
877 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
878 bool IsCrossAddressSpaceOrdering =
false;
879 if (Ordering != AtomicOrdering::NotAtomic) {
880 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
882 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
885 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
887 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
888 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
889 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
890 reportUnsupported(
MI,
"Unsupported atomic address space");
894 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
895 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
896 IsNonTemporal, IsLastUse, IsCooperative);
899std::optional<SIMemOpInfo>
903 if (!(
MI->mayLoad() && !
MI->mayStore()))
907 if (
MI->getNumMemOperands() == 0)
908 return SIMemOpInfo(ST);
910 return constructFromMIWithMMO(
MI);
913std::optional<SIMemOpInfo>
917 if (!(!
MI->mayLoad() &&
MI->mayStore()))
921 if (
MI->getNumMemOperands() == 0)
922 return SIMemOpInfo(ST);
924 return constructFromMIWithMMO(
MI);
927std::optional<SIMemOpInfo>
931 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
938 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
940 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
944 SIAtomicScope
Scope = SIAtomicScope::NONE;
945 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
946 bool IsCrossAddressSpaceOrdering =
false;
947 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
950 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
955 reportUnsupported(
MI,
"Unsupported atomic address space");
959 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
961 OrderingAddrSpace = *SynchronizeAS;
963 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
964 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
965 AtomicOrdering::NotAtomic);
968std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
972 if (!(
MI->mayLoad() &&
MI->mayStore()))
976 if (
MI->getNumMemOperands() == 0)
977 return SIMemOpInfo(ST);
979 return constructFromMIWithMMO(
MI);
982SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
983 TII =
ST.getInstrInfo();
990 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
999std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
1000 GCNSubtarget::Generation Generation =
ST.getGeneration();
1001 if (
ST.hasGFX940Insts())
1002 return std::make_unique<SIGfx940CacheControl>(ST);
1003 if (
ST.hasGFX90AInsts())
1004 return std::make_unique<SIGfx90ACacheControl>(ST);
1005 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
1006 return std::make_unique<SIGfx6CacheControl>(ST);
1007 if (Generation < AMDGPUSubtarget::GFX10)
1008 return std::make_unique<SIGfx7CacheControl>(ST);
1009 if (Generation < AMDGPUSubtarget::GFX11)
1010 return std::make_unique<SIGfx10CacheControl>(ST);
1011 if (Generation < AMDGPUSubtarget::GFX12)
1012 return std::make_unique<SIGfx11CacheControl>(ST);
1013 return std::make_unique<SIGfx12CacheControl>(ST);
1016bool SIGfx6CacheControl::enableLoadCacheBypass(
1018 SIAtomicScope Scope,
1019 SIAtomicAddrSpace AddrSpace)
const {
1023 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1025 case SIAtomicScope::SYSTEM:
1026 case SIAtomicScope::AGENT:
1031 case SIAtomicScope::WORKGROUP:
1032 case SIAtomicScope::WAVEFRONT:
1033 case SIAtomicScope::SINGLETHREAD:
1051bool SIGfx6CacheControl::enableStoreCacheBypass(
1053 SIAtomicScope Scope,
1054 SIAtomicAddrSpace AddrSpace)
const {
1064bool SIGfx6CacheControl::enableRMWCacheBypass(
1066 SIAtomicScope Scope,
1067 SIAtomicAddrSpace AddrSpace)
const {
1079bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1081 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1091 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1099 if (
Op == SIMemOp::LOAD)
1107 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1108 Position::AFTER, AtomicOrdering::Unordered);
1113 if (IsNonTemporal) {
1125 SIAtomicScope Scope,
1126 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1127 bool IsCrossAddrSpaceOrdering, Position Pos,
1131 MachineBasicBlock &
MBB = *
MI->getParent();
1134 if (Pos == Position::AFTER)
1138 bool LGKMCnt =
false;
1140 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1141 SIAtomicAddrSpace::NONE) {
1143 case SIAtomicScope::SYSTEM:
1144 case SIAtomicScope::AGENT:
1147 case SIAtomicScope::WORKGROUP:
1148 case SIAtomicScope::WAVEFRONT:
1149 case SIAtomicScope::SINGLETHREAD:
1158 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1160 case SIAtomicScope::SYSTEM:
1161 case SIAtomicScope::AGENT:
1162 case SIAtomicScope::WORKGROUP:
1169 LGKMCnt |= IsCrossAddrSpaceOrdering;
1171 case SIAtomicScope::WAVEFRONT:
1172 case SIAtomicScope::SINGLETHREAD:
1181 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1183 case SIAtomicScope::SYSTEM:
1184 case SIAtomicScope::AGENT:
1191 LGKMCnt |= IsCrossAddrSpaceOrdering;
1193 case SIAtomicScope::WORKGROUP:
1194 case SIAtomicScope::WAVEFRONT:
1195 case SIAtomicScope::SINGLETHREAD:
1204 if (VMCnt || LGKMCnt) {
1205 unsigned WaitCntImmediate =
1211 .
addImm(WaitCntImmediate);
1219 Scope == SIAtomicScope::WORKGROUP &&
1220 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1225 if (Pos == Position::AFTER)
1232 SIAtomicScope Scope,
1233 SIAtomicAddrSpace AddrSpace,
1234 Position Pos)
const {
1235 if (!InsertCacheInv)
1240 MachineBasicBlock &
MBB = *
MI->getParent();
1243 if (Pos == Position::AFTER)
1246 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1248 case SIAtomicScope::SYSTEM:
1249 case SIAtomicScope::AGENT:
1253 case SIAtomicScope::WORKGROUP:
1254 case SIAtomicScope::WAVEFRONT:
1255 case SIAtomicScope::SINGLETHREAD:
1270 if (Pos == Position::AFTER)
1277 SIAtomicScope Scope,
1278 SIAtomicAddrSpace AddrSpace,
1279 bool IsCrossAddrSpaceOrdering,
1280 Position Pos)
const {
1281 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1282 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1286 SIAtomicScope Scope,
1287 SIAtomicAddrSpace AddrSpace,
1288 Position Pos)
const {
1289 if (!InsertCacheInv)
1294 MachineBasicBlock &
MBB = *
MI->getParent();
1300 ? AMDGPU::BUFFER_WBINVL1
1301 : AMDGPU::BUFFER_WBINVL1_VOL;
1303 if (Pos == Position::AFTER)
1306 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1308 case SIAtomicScope::SYSTEM:
1309 case SIAtomicScope::AGENT:
1313 case SIAtomicScope::WORKGROUP:
1314 case SIAtomicScope::WAVEFRONT:
1315 case SIAtomicScope::SINGLETHREAD:
1330 if (Pos == Position::AFTER)
1336bool SIGfx90ACacheControl::enableLoadCacheBypass(
1338 SIAtomicScope Scope,
1339 SIAtomicAddrSpace AddrSpace)
const {
1343 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1345 case SIAtomicScope::SYSTEM:
1346 case SIAtomicScope::AGENT:
1351 case SIAtomicScope::WORKGROUP:
1356 if (
ST.isTgSplitEnabled())
1359 case SIAtomicScope::WAVEFRONT:
1360 case SIAtomicScope::SINGLETHREAD:
1378bool SIGfx90ACacheControl::enableStoreCacheBypass(
1380 SIAtomicScope Scope,
1381 SIAtomicAddrSpace AddrSpace)
const {
1385 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1387 case SIAtomicScope::SYSTEM:
1388 case SIAtomicScope::AGENT:
1392 case SIAtomicScope::WORKGROUP:
1393 case SIAtomicScope::WAVEFRONT:
1394 case SIAtomicScope::SINGLETHREAD:
1413bool SIGfx90ACacheControl::enableRMWCacheBypass(
1415 SIAtomicScope Scope,
1416 SIAtomicAddrSpace AddrSpace)
const {
1420 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1422 case SIAtomicScope::SYSTEM:
1423 case SIAtomicScope::AGENT:
1428 case SIAtomicScope::WORKGROUP:
1429 case SIAtomicScope::WAVEFRONT:
1430 case SIAtomicScope::SINGLETHREAD:
1441bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1443 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1453 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1461 if (
Op == SIMemOp::LOAD)
1469 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1470 Position::AFTER, AtomicOrdering::Unordered);
1475 if (IsNonTemporal) {
1487 SIAtomicScope Scope,
1488 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1489 bool IsCrossAddrSpaceOrdering,
1492 if (
ST.isTgSplitEnabled()) {
1500 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1501 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1502 (Scope == SIAtomicScope::WORKGROUP)) {
1504 Scope = SIAtomicScope::AGENT;
1508 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1510 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1511 IsCrossAddrSpaceOrdering, Pos, Order);
1515 SIAtomicScope Scope,
1516 SIAtomicAddrSpace AddrSpace,
1517 Position Pos)
const {
1518 if (!InsertCacheInv)
1523 MachineBasicBlock &
MBB = *
MI->getParent();
1526 if (Pos == Position::AFTER)
1529 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1531 case SIAtomicScope::SYSTEM:
1543 case SIAtomicScope::AGENT:
1546 case SIAtomicScope::WORKGROUP:
1551 if (
ST.isTgSplitEnabled()) {
1553 Scope = SIAtomicScope::AGENT;
1556 case SIAtomicScope::WAVEFRONT:
1557 case SIAtomicScope::SINGLETHREAD:
1572 if (Pos == Position::AFTER)
1575 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1581 SIAtomicScope Scope,
1582 SIAtomicAddrSpace AddrSpace,
1583 bool IsCrossAddrSpaceOrdering,
1584 Position Pos)
const {
1587 MachineBasicBlock &
MBB = *
MI->getParent();
1590 if (Pos == Position::AFTER)
1593 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1595 case SIAtomicScope::SYSTEM:
1609 case SIAtomicScope::AGENT:
1610 case SIAtomicScope::WORKGROUP:
1611 case SIAtomicScope::WAVEFRONT:
1612 case SIAtomicScope::SINGLETHREAD:
1620 if (Pos == Position::AFTER)
1624 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1625 IsCrossAddrSpaceOrdering, Pos);
1630bool SIGfx940CacheControl::enableLoadCacheBypass(
1632 SIAtomicAddrSpace AddrSpace)
const {
1636 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1638 case SIAtomicScope::SYSTEM:
1643 case SIAtomicScope::AGENT:
1647 case SIAtomicScope::WORKGROUP:
1655 case SIAtomicScope::WAVEFRONT:
1656 case SIAtomicScope::SINGLETHREAD:
1674bool SIGfx940CacheControl::enableStoreCacheBypass(
1676 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1680 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1682 case SIAtomicScope::SYSTEM:
1687 case SIAtomicScope::AGENT:
1691 case SIAtomicScope::WORKGROUP:
1695 case SIAtomicScope::WAVEFRONT:
1696 case SIAtomicScope::SINGLETHREAD:
1714bool SIGfx940CacheControl::enableRMWCacheBypass(
1716 SIAtomicAddrSpace AddrSpace)
const {
1720 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1722 case SIAtomicScope::SYSTEM:
1726 case SIAtomicScope::AGENT:
1727 case SIAtomicScope::WORKGROUP:
1728 case SIAtomicScope::WAVEFRONT:
1729 case SIAtomicScope::SINGLETHREAD:
1743bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1745 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1755 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1769 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1770 Position::AFTER, AtomicOrdering::Unordered);
1775 if (IsNonTemporal) {
1784 SIAtomicScope Scope,
1785 SIAtomicAddrSpace AddrSpace,
1786 Position Pos)
const {
1787 if (!InsertCacheInv)
1792 MachineBasicBlock &
MBB = *
MI->getParent();
1795 if (Pos == Position::AFTER)
1798 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1800 case SIAtomicScope::SYSTEM:
1814 case SIAtomicScope::AGENT:
1827 case SIAtomicScope::WORKGROUP:
1832 if (
ST.isTgSplitEnabled()) {
1846 case SIAtomicScope::WAVEFRONT:
1847 case SIAtomicScope::SINGLETHREAD:
1863 if (Pos == Position::AFTER)
1870 SIAtomicScope Scope,
1871 SIAtomicAddrSpace AddrSpace,
1872 bool IsCrossAddrSpaceOrdering,
1873 Position Pos)
const {
1876 MachineBasicBlock &
MBB = *
MI->getParent();
1879 if (Pos == Position::AFTER)
1882 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1884 case SIAtomicScope::SYSTEM:
1899 case SIAtomicScope::AGENT:
1909 case SIAtomicScope::WORKGROUP:
1910 case SIAtomicScope::WAVEFRONT:
1911 case SIAtomicScope::SINGLETHREAD:
1921 if (Pos == Position::AFTER)
1926 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1927 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1932bool SIGfx10CacheControl::enableLoadCacheBypass(
1934 SIAtomicScope Scope,
1935 SIAtomicAddrSpace AddrSpace)
const {
1939 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1941 case SIAtomicScope::SYSTEM:
1942 case SIAtomicScope::AGENT:
1948 case SIAtomicScope::WORKGROUP:
1953 if (!
ST.isCuModeEnabled())
1956 case SIAtomicScope::WAVEFRONT:
1957 case SIAtomicScope::SINGLETHREAD:
1975bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1977 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1988 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1996 if (
Op == SIMemOp::LOAD) {
2006 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2007 Position::AFTER, AtomicOrdering::Unordered);
2011 if (IsNonTemporal) {
2016 if (
Op == SIMemOp::STORE)
2027 SIAtomicScope Scope,
2028 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2029 bool IsCrossAddrSpaceOrdering,
2033 MachineBasicBlock &
MBB = *
MI->getParent();
2036 if (Pos == Position::AFTER)
2041 bool LGKMCnt =
false;
2043 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2044 SIAtomicAddrSpace::NONE) {
2046 case SIAtomicScope::SYSTEM:
2047 case SIAtomicScope::AGENT:
2048 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2050 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2053 case SIAtomicScope::WORKGROUP:
2059 if (!
ST.isCuModeEnabled()) {
2060 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2062 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2066 case SIAtomicScope::WAVEFRONT:
2067 case SIAtomicScope::SINGLETHREAD:
2076 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2078 case SIAtomicScope::SYSTEM:
2079 case SIAtomicScope::AGENT:
2080 case SIAtomicScope::WORKGROUP:
2087 LGKMCnt |= IsCrossAddrSpaceOrdering;
2089 case SIAtomicScope::WAVEFRONT:
2090 case SIAtomicScope::SINGLETHREAD:
2099 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
2101 case SIAtomicScope::SYSTEM:
2102 case SIAtomicScope::AGENT:
2109 LGKMCnt |= IsCrossAddrSpaceOrdering;
2111 case SIAtomicScope::WORKGROUP:
2112 case SIAtomicScope::WAVEFRONT:
2113 case SIAtomicScope::SINGLETHREAD:
2122 if (VMCnt || LGKMCnt) {
2123 unsigned WaitCntImmediate =
2129 .
addImm(WaitCntImmediate);
2137 Scope == SIAtomicScope::WORKGROUP &&
2138 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2150 if (Pos == Position::AFTER)
2157 SIAtomicScope Scope,
2158 SIAtomicAddrSpace AddrSpace,
2159 Position Pos)
const {
2160 if (!InsertCacheInv)
2165 MachineBasicBlock &
MBB = *
MI->getParent();
2168 if (Pos == Position::AFTER)
2171 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2173 case SIAtomicScope::SYSTEM:
2174 case SIAtomicScope::AGENT:
2182 case SIAtomicScope::WORKGROUP:
2187 if (!
ST.isCuModeEnabled()) {
2192 case SIAtomicScope::WAVEFRONT:
2193 case SIAtomicScope::SINGLETHREAD:
2208 if (Pos == Position::AFTER)
2214bool SIGfx10CacheControl::insertBarrierStart(
2221 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts())
2225 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
2230bool SIGfx11CacheControl::enableLoadCacheBypass(
2232 SIAtomicAddrSpace AddrSpace)
const {
2236 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2238 case SIAtomicScope::SYSTEM:
2239 case SIAtomicScope::AGENT:
2244 case SIAtomicScope::WORKGROUP:
2249 if (!
ST.isCuModeEnabled())
2252 case SIAtomicScope::WAVEFRONT:
2253 case SIAtomicScope::SINGLETHREAD:
2271bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2273 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2284 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2292 if (
Op == SIMemOp::LOAD)
2303 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2304 Position::AFTER, AtomicOrdering::Unordered);
2308 if (IsNonTemporal) {
2313 if (
Op == SIMemOp::STORE)
2327 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2342 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2355bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2359 MachineBasicBlock &
MBB = *
MI->getParent();
2363 if (
ST.hasImageInsts()) {
2374 SIAtomicScope Scope,
2375 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2376 bool IsCrossAddrSpaceOrdering,
2380 MachineBasicBlock &
MBB = *
MI->getParent();
2383 bool LOADCnt =
false;
2385 bool STORECnt =
false;
2387 if (Pos == Position::AFTER)
2390 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2391 SIAtomicAddrSpace::NONE) {
2393 case SIAtomicScope::SYSTEM:
2394 case SIAtomicScope::AGENT:
2395 case SIAtomicScope::CLUSTER:
2396 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2398 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2401 case SIAtomicScope::WORKGROUP:
2411 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts()) {
2412 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2414 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2418 case SIAtomicScope::WAVEFRONT:
2419 case SIAtomicScope::SINGLETHREAD:
2428 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2430 case SIAtomicScope::SYSTEM:
2431 case SIAtomicScope::AGENT:
2432 case SIAtomicScope::CLUSTER:
2433 case SIAtomicScope::WORKGROUP:
2440 DSCnt |= IsCrossAddrSpaceOrdering;
2442 case SIAtomicScope::WAVEFRONT:
2443 case SIAtomicScope::SINGLETHREAD:
2464 if (Order != AtomicOrdering::Acquire &&
ST.hasImageInsts()) {
2482 if (Pos == Position::AFTER)
2489 SIAtomicScope Scope,
2490 SIAtomicAddrSpace AddrSpace,
2491 Position Pos)
const {
2492 if (!InsertCacheInv)
2495 MachineBasicBlock &
MBB = *
MI->getParent();
2504 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2509 case SIAtomicScope::SYSTEM:
2512 case SIAtomicScope::AGENT:
2515 case SIAtomicScope::CLUSTER:
2518 case SIAtomicScope::WORKGROUP:
2527 if (
ST.isCuModeEnabled())
2532 case SIAtomicScope::WAVEFRONT:
2533 case SIAtomicScope::SINGLETHREAD:
2540 if (Pos == Position::AFTER)
2545 if (Pos == Position::AFTER)
2552 SIAtomicScope Scope,
2553 SIAtomicAddrSpace AddrSpace,
2554 bool IsCrossAddrSpaceOrdering,
2555 Position Pos)
const {
2556 MachineBasicBlock &
MBB = *
MI->getParent();
2565 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
2568 if (Pos == Position::AFTER)
2577 case SIAtomicScope::SYSTEM:
2581 case SIAtomicScope::AGENT:
2583 if (
ST.hasGFX1250Insts()) {
2588 case SIAtomicScope::CLUSTER:
2589 case SIAtomicScope::WORKGROUP:
2592 case SIAtomicScope::WAVEFRONT:
2593 case SIAtomicScope::SINGLETHREAD:
2600 if (Pos == Position::AFTER)
2606 insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2607 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
2612bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2614 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2623 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2630 }
else if (IsNonTemporal) {
2643 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2644 Position::AFTER, AtomicOrdering::Unordered);
2650bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2651 assert(
MI.mayStore() &&
"Not a Store inst");
2652 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2657 if (Atomic &&
ST.requiresWaitXCntBeforeAtomicStores() &&
TII->isFLAT(
MI)) {
2658 MachineBasicBlock &
MBB = *
MI.getParent();
2667 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2674 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2679bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2680 if (!
ST.hasGFX1250Insts())
2684 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2685 assert(CPol &&
"No CPol operand?");
2693 SIAtomicScope Scope,
2694 SIAtomicAddrSpace AddrSpace)
const {
2697 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2699 case SIAtomicScope::SYSTEM:
2702 case SIAtomicScope::AGENT:
2705 case SIAtomicScope::CLUSTER:
2708 case SIAtomicScope::WORKGROUP:
2711 if (!
ST.isCuModeEnabled())
2714 case SIAtomicScope::WAVEFRONT:
2715 case SIAtomicScope::SINGLETHREAD:
2733bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2734 if (AtomicPseudoMIs.empty())
2737 for (
auto &
MI : AtomicPseudoMIs)
2738 MI->eraseFromParent();
2740 AtomicPseudoMIs.clear();
2744bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2750 if (MOI.isAtomic()) {
2752 if (Order == AtomicOrdering::Monotonic ||
2753 Order == AtomicOrdering::Acquire ||
2754 Order == AtomicOrdering::SequentiallyConsistent) {
2755 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2756 MOI.getOrderingAddrSpace());
2761 if (MOI.isCooperative())
2762 Changed |= CC->handleCooperativeAtomic(*
MI);
2764 if (Order == AtomicOrdering::SequentiallyConsistent)
2765 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2766 SIMemOp::LOAD | SIMemOp::STORE,
2767 MOI.getIsCrossAddressSpaceOrdering(),
2768 Position::BEFORE, Order);
2770 if (Order == AtomicOrdering::Acquire ||
2771 Order == AtomicOrdering::SequentiallyConsistent) {
2773 MI, MOI.getScope(), MOI.getInstrAddrSpace(), SIMemOp::LOAD,
2774 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2775 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2776 MOI.getOrderingAddrSpace(),
2786 Changed |= CC->enableVolatileAndOrNonTemporal(
2787 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2788 MOI.isNonTemporal(), MOI.isLastUse());
2793bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2799 MachineInstr &StoreMI = *
MI;
2801 if (MOI.isAtomic()) {
2802 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2803 MOI.getOrdering() == AtomicOrdering::Release ||
2804 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2805 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2806 MOI.getOrderingAddrSpace());
2811 if (MOI.isCooperative())
2812 Changed |= CC->handleCooperativeAtomic(*
MI);
2814 if (MOI.getOrdering() == AtomicOrdering::Release ||
2815 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2816 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2817 MOI.getOrderingAddrSpace(),
2818 MOI.getIsCrossAddressSpaceOrdering(),
2821 Changed |= CC->finalizeStore(StoreMI,
true);
2828 Changed |= CC->enableVolatileAndOrNonTemporal(
2829 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2830 MOI.isNonTemporal());
2834 Changed |= CC->finalizeStore(StoreMI,
false);
2838bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2840 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2842 AtomicPseudoMIs.push_back(
MI);
2845 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2847 if (MOI.isAtomic()) {
2849 if (Order == AtomicOrdering::Acquire) {
2851 MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2852 MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE, Order);
2855 if (Order == AtomicOrdering::Release ||
2856 Order == AtomicOrdering::AcquireRelease ||
2857 Order == AtomicOrdering::SequentiallyConsistent)
2865 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2866 MOI.getIsCrossAddressSpaceOrdering(),
2874 if (Order == AtomicOrdering::Acquire ||
2875 Order == AtomicOrdering::AcquireRelease ||
2876 Order == AtomicOrdering::SequentiallyConsistent)
2877 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2886bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2891 MachineInstr &RMWMI = *
MI;
2893 if (MOI.isAtomic()) {
2895 if (Order == AtomicOrdering::Monotonic ||
2896 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2897 Order == AtomicOrdering::AcquireRelease ||
2898 Order == AtomicOrdering::SequentiallyConsistent) {
2899 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2900 MOI.getInstrAddrSpace());
2903 if (Order == AtomicOrdering::Release ||
2904 Order == AtomicOrdering::AcquireRelease ||
2905 Order == AtomicOrdering::SequentiallyConsistent ||
2906 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2907 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2908 MOI.getOrderingAddrSpace(),
2909 MOI.getIsCrossAddressSpaceOrdering(),
2912 if (Order == AtomicOrdering::Acquire ||
2913 Order == AtomicOrdering::AcquireRelease ||
2914 Order == AtomicOrdering::SequentiallyConsistent ||
2915 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2916 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2918 MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2919 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2920 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2921 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2922 MOI.getOrderingAddrSpace(),
2926 Changed |= CC->finalizeStore(RMWMI,
true);
2933bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2934 const MachineModuleInfo &MMI =
2935 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2936 return SIMemoryLegalizer(MMI).run(MF);
2943 .getCachedResult<MachineModuleAnalysis>(
2945 assert(MMI &&
"MachineModuleAnalysis must be available");
2946 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2956 CC = SICacheControl::create(ST);
2958 for (
auto &
MBB : MF) {
2962 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2965 I != E &&
I->isBundledWithPred(); ++
I) {
2966 I->unbundleFromPred();
2969 MO.setIsInternalRead(
false);
2972 MI->eraseFromParent();
2973 MI =
II->getIterator();
2976 if (
ST.getInstrInfo()->isBarrierStart(
MI->getOpcode())) {
2984 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2986 else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
2988 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2990 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2991 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2995 Changed |= removeAtomicPseudoMIs();
3001char SIMemoryLegalizerLegacy::
ID = 0;
3005 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isAtomicRet(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
DWARFExpression::Operation Op
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()