34#define DEBUG_TYPE "si-memory-legalizer"
35#define PASS_NAME "SI Memory Legalizer"
39 cl::desc(
"Use this to skip inserting cache invalidating instructions."));
61enum class SIAtomicScope {
73enum class SIAtomicAddrSpace {
82 FLAT = GLOBAL |
LDS | SCRATCH,
85 ATOMIC = GLOBAL |
LDS | SCRATCH | GDS,
88 ALL = GLOBAL |
LDS | SCRATCH | GDS | OTHER,
93class SIMemOpInfo final {
96 friend class SIMemOpAccess;
100 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
101 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
102 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
103 bool IsCrossAddressSpaceOrdering =
false;
104 bool IsVolatile =
false;
105 bool IsNonTemporal =
false;
106 bool IsLastUse =
false;
107 bool IsCooperative =
false;
111 const GCNSubtarget &ST,
113 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
114 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
115 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
116 bool IsCrossAddressSpaceOrdering =
true,
117 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
118 bool IsVolatile =
false,
bool IsNonTemporal =
false,
119 bool IsLastUse =
false,
bool IsCooperative =
false)
120 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
121 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
122 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
123 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
124 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
126 if (Ordering == AtomicOrdering::NotAtomic) {
127 assert(!IsCooperative &&
"Cannot be cooperative & non-atomic!");
128 assert(Scope == SIAtomicScope::NONE &&
129 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
130 !IsCrossAddressSpaceOrdering &&
131 FailureOrdering == AtomicOrdering::NotAtomic);
135 assert(Scope != SIAtomicScope::NONE &&
136 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
137 SIAtomicAddrSpace::NONE &&
138 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
139 SIAtomicAddrSpace::NONE);
144 if ((OrderingAddrSpace == InstrAddrSpace) &&
146 this->IsCrossAddressSpaceOrdering =
false;
150 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
151 SIAtomicAddrSpace::NONE) {
152 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
153 }
else if ((InstrAddrSpace &
154 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
155 SIAtomicAddrSpace::NONE) {
156 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
157 }
else if ((InstrAddrSpace &
158 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
159 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
160 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
165 if (this->Scope == SIAtomicScope::CLUSTER && !
ST.hasClusters())
166 this->Scope = SIAtomicScope::AGENT;
172 SIAtomicScope getScope()
const {
185 return FailureOrdering;
190 SIAtomicAddrSpace getInstrAddrSpace()
const {
191 return InstrAddrSpace;
196 SIAtomicAddrSpace getOrderingAddrSpace()
const {
197 return OrderingAddrSpace;
202 bool getIsCrossAddressSpaceOrdering()
const {
203 return IsCrossAddressSpaceOrdering;
208 bool isVolatile()
const {
214 bool isNonTemporal()
const {
215 return IsNonTemporal;
220 bool isLastUse()
const {
return IsLastUse; }
223 bool isCooperative()
const {
return IsCooperative; }
227 bool isAtomic()
const {
228 return Ordering != AtomicOrdering::NotAtomic;
233class SIMemOpAccess final {
235 const AMDGPUMachineModuleInfo *MMI =
nullptr;
236 const GCNSubtarget &ST;
240 const char *Msg)
const;
246 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
247 toSIAtomicScope(
SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace)
const;
250 SIAtomicAddrSpace toSIAtomicAddrSpace(
unsigned AS)
const;
254 std::optional<SIMemOpInfo>
260 SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI,
const GCNSubtarget &ST);
263 std::optional<SIMemOpInfo>
268 std::optional<SIMemOpInfo>
273 std::optional<SIMemOpInfo>
278 std::optional<SIMemOpInfo>
282class SICacheControl {
286 const GCNSubtarget &ST;
289 const SIInstrInfo *TII =
nullptr;
296 SICacheControl(
const GCNSubtarget &ST);
305 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const;
310 static std::unique_ptr<SICacheControl> create(
const GCNSubtarget &ST);
317 SIAtomicAddrSpace AddrSpace)
const = 0;
324 SIAtomicAddrSpace AddrSpace)
const = 0;
331 SIAtomicAddrSpace AddrSpace)
const = 0;
337 SIAtomicAddrSpace AddrSpace,
338 SIMemOp
Op,
bool IsVolatile,
340 bool IsLastUse =
false)
const = 0;
347 virtual bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
352 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const {
354 "cooperative atomics are not available on this architecture");
365 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
366 bool IsCrossAddrSpaceOrdering, Position Pos,
376 SIAtomicAddrSpace AddrSpace,
377 Position Pos)
const = 0;
387 SIAtomicAddrSpace AddrSpace,
388 bool IsCrossAddrSpaceOrdering,
389 Position Pos)
const = 0;
398 virtual ~SICacheControl() =
default;
401class SIGfx6CacheControl :
public SICacheControl {
418 SIGfx6CacheControl(
const GCNSubtarget &ST) : SICacheControl(
ST) {}
422 SIAtomicAddrSpace AddrSpace)
const override;
426 SIAtomicAddrSpace AddrSpace)
const override;
430 SIAtomicAddrSpace AddrSpace)
const override;
433 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
434 bool IsVolatile,
bool IsNonTemporal,
435 bool IsLastUse)
const override;
438 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
439 bool IsCrossAddrSpaceOrdering, Position Pos,
444 SIAtomicAddrSpace AddrSpace,
445 Position Pos)
const override;
449 SIAtomicAddrSpace AddrSpace,
450 bool IsCrossAddrSpaceOrdering,
451 Position Pos)
const override;
454class SIGfx7CacheControl :
public SIGfx6CacheControl {
457 SIGfx7CacheControl(
const GCNSubtarget &ST) : SIGfx6CacheControl(
ST) {}
461 SIAtomicAddrSpace AddrSpace,
462 Position Pos)
const override;
466class SIGfx90ACacheControl :
public SIGfx7CacheControl {
469 SIGfx90ACacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
473 SIAtomicAddrSpace AddrSpace)
const override;
477 SIAtomicAddrSpace AddrSpace)
const override;
480 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
481 bool IsVolatile,
bool IsNonTemporal,
482 bool IsLastUse)
const override;
485 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
486 bool IsCrossAddrSpaceOrdering, Position Pos,
491 SIAtomicAddrSpace AddrSpace,
492 Position Pos)
const override;
496 SIAtomicAddrSpace AddrSpace,
497 bool IsCrossAddrSpaceOrdering,
498 Position Pos)
const override;
501class SIGfx940CacheControl :
public SIGfx90ACacheControl {
523 SIGfx940CacheControl(
const GCNSubtarget &ST) : SIGfx90ACacheControl(
ST) {};
527 SIAtomicAddrSpace AddrSpace)
const override;
531 SIAtomicAddrSpace AddrSpace)
const override;
535 SIAtomicAddrSpace AddrSpace)
const override;
538 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
539 bool IsVolatile,
bool IsNonTemporal,
540 bool IsLastUse)
const override;
543 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
546 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
547 Position Pos)
const override;
550class SIGfx10CacheControl :
public SIGfx7CacheControl {
561 SIGfx10CacheControl(
const GCNSubtarget &ST) : SIGfx7CacheControl(
ST) {}
565 SIAtomicAddrSpace AddrSpace)
const override;
568 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
569 bool IsVolatile,
bool IsNonTemporal,
570 bool IsLastUse)
const override;
573 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
574 bool IsCrossAddrSpaceOrdering, Position Pos,
579 SIAtomicAddrSpace AddrSpace,
580 Position Pos)
const override;
585class SIGfx11CacheControl :
public SIGfx10CacheControl {
587 SIGfx11CacheControl(
const GCNSubtarget &ST) : SIGfx10CacheControl(
ST) {}
591 SIAtomicAddrSpace AddrSpace)
const override;
594 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
595 bool IsVolatile,
bool IsNonTemporal,
596 bool IsLastUse)
const override;
599class SIGfx12CacheControl :
public SIGfx11CacheControl {
620 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const;
623 SIGfx12CacheControl(
const GCNSubtarget &ST) : SIGfx11CacheControl(
ST) {
626 assert(!
ST.hasGFX1250Insts() ||
ST.isCuModeEnabled());
630 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
631 bool IsCrossAddrSpaceOrdering, Position Pos,
635 SIAtomicAddrSpace AddrSpace, Position Pos)
const override;
638 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
639 bool IsVolatile,
bool IsNonTemporal,
640 bool IsLastUse)
const override;
642 bool finalizeStore(MachineInstr &
MI,
bool Atomic)
const override;
644 virtual bool handleCooperativeAtomic(MachineInstr &
MI)
const override;
647 SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
648 Position Pos)
const override;
652 SIAtomicAddrSpace AddrSpace)
const override {
653 return setAtomicScope(
MI, Scope, AddrSpace);
658 SIAtomicAddrSpace AddrSpace)
const override {
659 return setAtomicScope(
MI, Scope, AddrSpace);
664 SIAtomicAddrSpace AddrSpace)
const override {
665 return setAtomicScope(
MI, Scope, AddrSpace);
669class SIMemoryLegalizer final {
671 const MachineModuleInfo &MMI;
673 std::unique_ptr<SICacheControl> CC =
nullptr;
676 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
680 bool isAtomicRet(
const MachineInstr &
MI)
const {
686 bool removeAtomicPseudoMIs();
690 bool expandLoad(
const SIMemOpInfo &MOI,
694 bool expandStore(
const SIMemOpInfo &MOI,
698 bool expandAtomicFence(
const SIMemOpInfo &MOI,
702 bool expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
706 SIMemoryLegalizer(
const MachineModuleInfo &MMI) : MMI(MMI) {};
707 bool run(MachineFunction &MF);
714 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
716 void getAnalysisUsage(AnalysisUsage &AU)
const override {
721 StringRef getPassName()
const override {
725 bool runOnMachineFunction(MachineFunction &MF)
override;
729 {
"global", SIAtomicAddrSpace::GLOBAL},
730 {
"local", SIAtomicAddrSpace::LDS},
738 OS <<
"unknown address space '" << AS <<
"'; expected one of ";
740 for (
const auto &[Name, Val] : ASNames)
741 OS <<
LS <<
'\'' <<
Name <<
'\'';
749static std::optional<SIAtomicAddrSpace>
751 static constexpr StringLiteral FenceASPrefix =
"amdgpu-synchronize-as";
757 SIAtomicAddrSpace
Result = SIAtomicAddrSpace::NONE;
758 for (
const auto &[Prefix, Suffix] : MMRA) {
759 if (Prefix != FenceASPrefix)
762 if (
auto It = ASNames.find(Suffix); It != ASNames.end())
765 diagnoseUnknownMMRAASName(
MI, Suffix);
768 if (Result == SIAtomicAddrSpace::NONE)
777 const char *Msg)
const {
778 const Function &
Func =
MI->getParent()->getParent()->getFunction();
779 Func.getContext().diagnose(
780 DiagnosticInfoUnsupported(Func, Msg,
MI->getDebugLoc()));
783std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
785 SIAtomicAddrSpace InstrAddrSpace)
const {
787 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC,
true);
789 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC,
true);
791 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC,
true);
793 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
796 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
799 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
802 return std::tuple(SIAtomicScope::SYSTEM,
803 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
805 return std::tuple(SIAtomicScope::AGENT,
806 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
808 return std::tuple(SIAtomicScope::CLUSTER,
809 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
811 return std::tuple(SIAtomicScope::WORKGROUP,
812 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
814 return std::tuple(SIAtomicScope::WAVEFRONT,
815 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
817 return std::tuple(SIAtomicScope::SINGLETHREAD,
818 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
822SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(
unsigned AS)
const {
824 return SIAtomicAddrSpace::FLAT;
826 return SIAtomicAddrSpace::GLOBAL;
828 return SIAtomicAddrSpace::LDS;
830 return SIAtomicAddrSpace::SCRATCH;
832 return SIAtomicAddrSpace::GDS;
834 return SIAtomicAddrSpace::OTHER;
837SIMemOpAccess::SIMemOpAccess(
const AMDGPUMachineModuleInfo &MMI_,
838 const GCNSubtarget &ST)
839 : MMI(&MMI_),
ST(
ST) {}
841std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
843 assert(
MI->getNumMemOperands() > 0);
848 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
849 bool IsNonTemporal =
true;
851 bool IsLastUse =
false;
852 bool IsCooperative =
false;
856 for (
const auto &MMO :
MI->memoperands()) {
857 IsNonTemporal &= MMO->isNonTemporal();
859 IsLastUse |= MMO->getFlags() &
MOLastUse;
862 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
864 if (OpOrdering != AtomicOrdering::NotAtomic) {
865 const auto &IsSyncScopeInclusion =
867 if (!IsSyncScopeInclusion) {
868 reportUnsupported(
MI,
869 "Unsupported non-inclusive atomic synchronization scope");
873 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
875 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
876 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
882 SIAtomicScope
Scope = SIAtomicScope::NONE;
883 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
884 bool IsCrossAddressSpaceOrdering =
false;
885 if (Ordering != AtomicOrdering::NotAtomic) {
886 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
888 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
891 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
893 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
894 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
895 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
896 reportUnsupported(
MI,
"Unsupported atomic address space");
900 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
901 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
902 IsNonTemporal, IsLastUse, IsCooperative);
905std::optional<SIMemOpInfo>
909 if (!(
MI->mayLoad() && !
MI->mayStore()))
913 if (
MI->getNumMemOperands() == 0)
914 return SIMemOpInfo(ST);
916 return constructFromMIWithMMO(
MI);
919std::optional<SIMemOpInfo>
923 if (!(!
MI->mayLoad() &&
MI->mayStore()))
927 if (
MI->getNumMemOperands() == 0)
928 return SIMemOpInfo(ST);
930 return constructFromMIWithMMO(
MI);
933std::optional<SIMemOpInfo>
937 if (
MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
944 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
946 reportUnsupported(
MI,
"Unsupported atomic synchronization scope");
950 SIAtomicScope
Scope = SIAtomicScope::NONE;
951 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
952 bool IsCrossAddressSpaceOrdering =
false;
953 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
956 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
961 reportUnsupported(
MI,
"Unsupported atomic address space");
965 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*
MI);
967 OrderingAddrSpace = *SynchronizeAS;
969 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
970 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
971 AtomicOrdering::NotAtomic);
974std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
978 if (!(
MI->mayLoad() &&
MI->mayStore()))
982 if (
MI->getNumMemOperands() == 0)
983 return SIMemOpInfo(ST);
985 return constructFromMIWithMMO(
MI);
988SICacheControl::SICacheControl(
const GCNSubtarget &ST) :
ST(
ST) {
989 TII =
ST.getInstrInfo();
996 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol);
1000 CPol->setImm(
CPol->getImm() | Bit);
1004bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS)
const {
1005 assert((!
ST.hasGloballyAddressableScratch() ||
1006 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
1007 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
1008 "scratch instructions should already be replaced by flat "
1009 "instructions if GloballyAddressableScratch is enabled");
1010 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
1014std::unique_ptr<SICacheControl> SICacheControl::create(
const GCNSubtarget &ST) {
1015 GCNSubtarget::Generation Generation =
ST.getGeneration();
1016 if (
ST.hasGFX940Insts())
1017 return std::make_unique<SIGfx940CacheControl>(ST);
1018 if (
ST.hasGFX90AInsts())
1019 return std::make_unique<SIGfx90ACacheControl>(ST);
1020 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
1021 return std::make_unique<SIGfx6CacheControl>(ST);
1022 if (Generation < AMDGPUSubtarget::GFX10)
1023 return std::make_unique<SIGfx7CacheControl>(ST);
1024 if (Generation < AMDGPUSubtarget::GFX11)
1025 return std::make_unique<SIGfx10CacheControl>(ST);
1026 if (Generation < AMDGPUSubtarget::GFX12)
1027 return std::make_unique<SIGfx11CacheControl>(ST);
1028 return std::make_unique<SIGfx12CacheControl>(ST);
1031bool SIGfx6CacheControl::enableLoadCacheBypass(
1033 SIAtomicScope Scope,
1034 SIAtomicAddrSpace AddrSpace)
const {
1038 if (canAffectGlobalAddrSpace(AddrSpace)) {
1040 case SIAtomicScope::SYSTEM:
1041 case SIAtomicScope::AGENT:
1046 case SIAtomicScope::WORKGROUP:
1047 case SIAtomicScope::WAVEFRONT:
1048 case SIAtomicScope::SINGLETHREAD:
1066bool SIGfx6CacheControl::enableStoreCacheBypass(
1068 SIAtomicScope Scope,
1069 SIAtomicAddrSpace AddrSpace)
const {
1079bool SIGfx6CacheControl::enableRMWCacheBypass(
1081 SIAtomicScope Scope,
1082 SIAtomicAddrSpace AddrSpace)
const {
1094bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1096 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1106 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1114 if (
Op == SIMemOp::LOAD)
1122 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1123 Position::AFTER, AtomicOrdering::Unordered);
1128 if (IsNonTemporal) {
1140 SIAtomicScope Scope,
1141 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1142 bool IsCrossAddrSpaceOrdering, Position Pos,
1146 MachineBasicBlock &
MBB = *
MI->getParent();
1149 if (Pos == Position::AFTER)
1153 bool LGKMCnt =
false;
1155 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1156 SIAtomicAddrSpace::NONE) {
1158 case SIAtomicScope::SYSTEM:
1159 case SIAtomicScope::AGENT:
1162 case SIAtomicScope::WORKGROUP:
1163 case SIAtomicScope::WAVEFRONT:
1164 case SIAtomicScope::SINGLETHREAD:
1173 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1175 case SIAtomicScope::SYSTEM:
1176 case SIAtomicScope::AGENT:
1177 case SIAtomicScope::WORKGROUP:
1184 LGKMCnt |= IsCrossAddrSpaceOrdering;
1186 case SIAtomicScope::WAVEFRONT:
1187 case SIAtomicScope::SINGLETHREAD:
1196 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1198 case SIAtomicScope::SYSTEM:
1199 case SIAtomicScope::AGENT:
1206 LGKMCnt |= IsCrossAddrSpaceOrdering;
1208 case SIAtomicScope::WORKGROUP:
1209 case SIAtomicScope::WAVEFRONT:
1210 case SIAtomicScope::SINGLETHREAD:
1219 if (VMCnt || LGKMCnt) {
1220 unsigned WaitCntImmediate =
1226 .
addImm(WaitCntImmediate);
1234 Scope == SIAtomicScope::WORKGROUP &&
1235 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1240 if (Pos == Position::AFTER)
1247 SIAtomicScope Scope,
1248 SIAtomicAddrSpace AddrSpace,
1249 Position Pos)
const {
1250 if (!InsertCacheInv)
1255 MachineBasicBlock &
MBB = *
MI->getParent();
1258 if (Pos == Position::AFTER)
1261 if (canAffectGlobalAddrSpace(AddrSpace)) {
1263 case SIAtomicScope::SYSTEM:
1264 case SIAtomicScope::AGENT:
1268 case SIAtomicScope::WORKGROUP:
1269 case SIAtomicScope::WAVEFRONT:
1270 case SIAtomicScope::SINGLETHREAD:
1285 if (Pos == Position::AFTER)
1292 SIAtomicScope Scope,
1293 SIAtomicAddrSpace AddrSpace,
1294 bool IsCrossAddrSpaceOrdering,
1295 Position Pos)
const {
1296 return insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1297 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1301 SIAtomicScope Scope,
1302 SIAtomicAddrSpace AddrSpace,
1303 Position Pos)
const {
1304 if (!InsertCacheInv)
1309 MachineBasicBlock &
MBB = *
MI->getParent();
1315 ? AMDGPU::BUFFER_WBINVL1
1316 : AMDGPU::BUFFER_WBINVL1_VOL;
1318 if (Pos == Position::AFTER)
1321 if (canAffectGlobalAddrSpace(AddrSpace)) {
1323 case SIAtomicScope::SYSTEM:
1324 case SIAtomicScope::AGENT:
1328 case SIAtomicScope::WORKGROUP:
1329 case SIAtomicScope::WAVEFRONT:
1330 case SIAtomicScope::SINGLETHREAD:
1345 if (Pos == Position::AFTER)
1351bool SIGfx90ACacheControl::enableLoadCacheBypass(
1353 SIAtomicScope Scope,
1354 SIAtomicAddrSpace AddrSpace)
const {
1358 if (canAffectGlobalAddrSpace(AddrSpace)) {
1360 case SIAtomicScope::SYSTEM:
1361 case SIAtomicScope::AGENT:
1366 case SIAtomicScope::WORKGROUP:
1371 if (
ST.isTgSplitEnabled())
1374 case SIAtomicScope::WAVEFRONT:
1375 case SIAtomicScope::SINGLETHREAD:
1393bool SIGfx90ACacheControl::enableRMWCacheBypass(
1395 SIAtomicScope Scope,
1396 SIAtomicAddrSpace AddrSpace)
const {
1400 if (canAffectGlobalAddrSpace(AddrSpace)) {
1402 case SIAtomicScope::SYSTEM:
1403 case SIAtomicScope::AGENT:
1408 case SIAtomicScope::WORKGROUP:
1409 case SIAtomicScope::WAVEFRONT:
1410 case SIAtomicScope::SINGLETHREAD:
1421bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1423 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1433 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1441 if (
Op == SIMemOp::LOAD)
1449 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1450 Position::AFTER, AtomicOrdering::Unordered);
1455 if (IsNonTemporal) {
1467 SIAtomicScope Scope,
1468 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
1469 bool IsCrossAddrSpaceOrdering,
1472 if (
ST.isTgSplitEnabled()) {
1480 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1481 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1482 (Scope == SIAtomicScope::WORKGROUP)) {
1484 Scope = SIAtomicScope::AGENT;
1488 AddrSpace &= ~SIAtomicAddrSpace
::LDS;
1490 return SIGfx7CacheControl::insertWait(
MI, Scope, AddrSpace,
Op,
1491 IsCrossAddrSpaceOrdering, Pos, Order);
1495 SIAtomicScope Scope,
1496 SIAtomicAddrSpace AddrSpace,
1497 Position Pos)
const {
1498 if (!InsertCacheInv)
1503 MachineBasicBlock &
MBB = *
MI->getParent();
1506 if (Pos == Position::AFTER)
1509 if (canAffectGlobalAddrSpace(AddrSpace)) {
1511 case SIAtomicScope::SYSTEM:
1523 case SIAtomicScope::AGENT:
1526 case SIAtomicScope::WORKGROUP:
1531 if (
ST.isTgSplitEnabled()) {
1533 Scope = SIAtomicScope::AGENT;
1536 case SIAtomicScope::WAVEFRONT:
1537 case SIAtomicScope::SINGLETHREAD:
1552 if (Pos == Position::AFTER)
1555 Changed |= SIGfx7CacheControl::insertAcquire(
MI, Scope, AddrSpace, Pos);
1561 SIAtomicScope Scope,
1562 SIAtomicAddrSpace AddrSpace,
1563 bool IsCrossAddrSpaceOrdering,
1564 Position Pos)
const {
1567 MachineBasicBlock &
MBB = *
MI->getParent();
1570 if (Pos == Position::AFTER)
1573 if (canAffectGlobalAddrSpace(AddrSpace)) {
1575 case SIAtomicScope::SYSTEM:
1589 case SIAtomicScope::AGENT:
1590 case SIAtomicScope::WORKGROUP:
1591 case SIAtomicScope::WAVEFRONT:
1592 case SIAtomicScope::SINGLETHREAD:
1600 if (Pos == Position::AFTER)
1604 SIGfx7CacheControl::insertRelease(
MI, Scope, AddrSpace,
1605 IsCrossAddrSpaceOrdering, Pos);
1610bool SIGfx940CacheControl::enableLoadCacheBypass(
1612 SIAtomicAddrSpace AddrSpace)
const {
1616 if (canAffectGlobalAddrSpace(AddrSpace)) {
1618 case SIAtomicScope::SYSTEM:
1623 case SIAtomicScope::AGENT:
1627 case SIAtomicScope::WORKGROUP:
1635 case SIAtomicScope::WAVEFRONT:
1636 case SIAtomicScope::SINGLETHREAD:
1654bool SIGfx940CacheControl::enableStoreCacheBypass(
1656 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace)
const {
1660 if (canAffectGlobalAddrSpace(AddrSpace)) {
1662 case SIAtomicScope::SYSTEM:
1667 case SIAtomicScope::AGENT:
1671 case SIAtomicScope::WORKGROUP:
1675 case SIAtomicScope::WAVEFRONT:
1676 case SIAtomicScope::SINGLETHREAD:
1694bool SIGfx940CacheControl::enableRMWCacheBypass(
1696 SIAtomicAddrSpace AddrSpace)
const {
1700 if (canAffectGlobalAddrSpace(AddrSpace)) {
1702 case SIAtomicScope::SYSTEM:
1706 case SIAtomicScope::AGENT:
1707 case SIAtomicScope::WORKGROUP:
1708 case SIAtomicScope::WAVEFRONT:
1709 case SIAtomicScope::SINGLETHREAD:
1723bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
1725 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1735 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1749 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1750 Position::AFTER, AtomicOrdering::Unordered);
1755 if (IsNonTemporal) {
1764 SIAtomicScope Scope,
1765 SIAtomicAddrSpace AddrSpace,
1766 Position Pos)
const {
1767 if (!InsertCacheInv)
1772 MachineBasicBlock &
MBB = *
MI->getParent();
1775 if (Pos == Position::AFTER)
1778 if (canAffectGlobalAddrSpace(AddrSpace)) {
1780 case SIAtomicScope::SYSTEM:
1794 case SIAtomicScope::AGENT:
1807 case SIAtomicScope::WORKGROUP:
1812 if (
ST.isTgSplitEnabled()) {
1826 case SIAtomicScope::WAVEFRONT:
1827 case SIAtomicScope::SINGLETHREAD:
1843 if (Pos == Position::AFTER)
1850 SIAtomicScope Scope,
1851 SIAtomicAddrSpace AddrSpace,
1852 bool IsCrossAddrSpaceOrdering,
1853 Position Pos)
const {
1856 MachineBasicBlock &
MBB = *
MI->getParent();
1859 if (Pos == Position::AFTER)
1862 if (canAffectGlobalAddrSpace(AddrSpace)) {
1864 case SIAtomicScope::SYSTEM:
1879 case SIAtomicScope::AGENT:
1889 case SIAtomicScope::WORKGROUP:
1890 case SIAtomicScope::WAVEFRONT:
1891 case SIAtomicScope::SINGLETHREAD:
1901 if (Pos == Position::AFTER)
1906 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1907 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
1912bool SIGfx10CacheControl::enableLoadCacheBypass(
1914 SIAtomicScope Scope,
1915 SIAtomicAddrSpace AddrSpace)
const {
1919 if (canAffectGlobalAddrSpace(AddrSpace)) {
1921 case SIAtomicScope::SYSTEM:
1922 case SIAtomicScope::AGENT:
1928 case SIAtomicScope::WORKGROUP:
1933 if (!
ST.isCuModeEnabled())
1936 case SIAtomicScope::WAVEFRONT:
1937 case SIAtomicScope::SINGLETHREAD:
1955bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1957 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
1968 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
1976 if (
Op == SIMemOp::LOAD) {
1986 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
1987 Position::AFTER, AtomicOrdering::Unordered);
1991 if (IsNonTemporal) {
1996 if (
Op == SIMemOp::STORE)
2007 SIAtomicScope Scope,
2008 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2009 bool IsCrossAddrSpaceOrdering,
2013 MachineBasicBlock &
MBB = *
MI->getParent();
2016 if (Pos == Position::AFTER)
2021 bool LGKMCnt =
false;
2023 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2024 SIAtomicAddrSpace::NONE) {
2026 case SIAtomicScope::SYSTEM:
2027 case SIAtomicScope::AGENT:
2028 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2030 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2033 case SIAtomicScope::WORKGROUP:
2039 if (!
ST.isCuModeEnabled()) {
2040 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2042 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2046 case SIAtomicScope::WAVEFRONT:
2047 case SIAtomicScope::SINGLETHREAD:
2056 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2058 case SIAtomicScope::SYSTEM:
2059 case SIAtomicScope::AGENT:
2060 case SIAtomicScope::WORKGROUP:
2067 LGKMCnt |= IsCrossAddrSpaceOrdering;
2069 case SIAtomicScope::WAVEFRONT:
2070 case SIAtomicScope::SINGLETHREAD:
2079 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
2081 case SIAtomicScope::SYSTEM:
2082 case SIAtomicScope::AGENT:
2089 LGKMCnt |= IsCrossAddrSpaceOrdering;
2091 case SIAtomicScope::WORKGROUP:
2092 case SIAtomicScope::WAVEFRONT:
2093 case SIAtomicScope::SINGLETHREAD:
2102 if (VMCnt || LGKMCnt) {
2103 unsigned WaitCntImmediate =
2109 .
addImm(WaitCntImmediate);
2117 Scope == SIAtomicScope::WORKGROUP &&
2118 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2130 if (Pos == Position::AFTER)
2137 SIAtomicScope Scope,
2138 SIAtomicAddrSpace AddrSpace,
2139 Position Pos)
const {
2140 if (!InsertCacheInv)
2145 MachineBasicBlock &
MBB = *
MI->getParent();
2148 if (Pos == Position::AFTER)
2151 if (canAffectGlobalAddrSpace(AddrSpace)) {
2153 case SIAtomicScope::SYSTEM:
2154 case SIAtomicScope::AGENT:
2162 case SIAtomicScope::WORKGROUP:
2167 if (!
ST.isCuModeEnabled()) {
2172 case SIAtomicScope::WAVEFRONT:
2173 case SIAtomicScope::SINGLETHREAD:
2188 if (Pos == Position::AFTER)
2194bool SIGfx10CacheControl::insertBarrierStart(
2201 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts())
2205 TII->get(AMDGPU::S_WAITCNT_DEPCTR))
2210bool SIGfx11CacheControl::enableLoadCacheBypass(
2212 SIAtomicAddrSpace AddrSpace)
const {
2216 if (canAffectGlobalAddrSpace(AddrSpace)) {
2218 case SIAtomicScope::SYSTEM:
2219 case SIAtomicScope::AGENT:
2224 case SIAtomicScope::WORKGROUP:
2229 if (!
ST.isCuModeEnabled())
2232 case SIAtomicScope::WAVEFRONT:
2233 case SIAtomicScope::SINGLETHREAD:
2251bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
2253 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2264 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2272 if (
Op == SIMemOp::LOAD)
2283 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2284 Position::AFTER, AtomicOrdering::Unordered);
2288 if (IsNonTemporal) {
2293 if (
Op == SIMemOp::STORE)
2307 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2322 MachineOperand *
CPol =
TII->getNamedOperand(*
MI, OpName::cpol);
2335bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
2339 MachineBasicBlock &
MBB = *
MI->getParent();
2343 if (
ST.hasImageInsts()) {
2354 SIAtomicScope Scope,
2355 SIAtomicAddrSpace AddrSpace, SIMemOp
Op,
2356 bool IsCrossAddrSpaceOrdering,
2360 MachineBasicBlock &
MBB = *
MI->getParent();
2363 bool LOADCnt =
false;
2365 bool STORECnt =
false;
2367 if (Pos == Position::AFTER)
2370 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
2371 SIAtomicAddrSpace::NONE) {
2373 case SIAtomicScope::SYSTEM:
2374 case SIAtomicScope::AGENT:
2375 case SIAtomicScope::CLUSTER:
2376 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2378 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2381 case SIAtomicScope::WORKGROUP:
2394 if (!
ST.isCuModeEnabled() ||
ST.hasGFX1250Insts()) {
2395 if ((
Op & SIMemOp::LOAD) != SIMemOp::NONE)
2397 if ((
Op & SIMemOp::STORE) != SIMemOp::NONE)
2401 case SIAtomicScope::WAVEFRONT:
2402 case SIAtomicScope::SINGLETHREAD:
2411 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
2413 case SIAtomicScope::SYSTEM:
2414 case SIAtomicScope::AGENT:
2415 case SIAtomicScope::CLUSTER:
2416 case SIAtomicScope::WORKGROUP:
2423 DSCnt |= IsCrossAddrSpaceOrdering;
2425 case SIAtomicScope::WAVEFRONT:
2426 case SIAtomicScope::SINGLETHREAD:
2447 if (Order != AtomicOrdering::Acquire &&
ST.hasImageInsts()) {
2465 if (Pos == Position::AFTER)
2472 SIAtomicScope Scope,
2473 SIAtomicAddrSpace AddrSpace,
2474 Position Pos)
const {
2475 if (!InsertCacheInv)
2478 MachineBasicBlock &
MBB = *
MI->getParent();
2487 if (!canAffectGlobalAddrSpace(AddrSpace))
2492 case SIAtomicScope::SYSTEM:
2495 case SIAtomicScope::AGENT:
2498 case SIAtomicScope::CLUSTER:
2501 case SIAtomicScope::WORKGROUP:
2509 if (
ST.isCuModeEnabled())
2514 case SIAtomicScope::WAVEFRONT:
2515 case SIAtomicScope::SINGLETHREAD:
2522 if (Pos == Position::AFTER)
2527 if (Pos == Position::AFTER)
2534 SIAtomicScope Scope,
2535 SIAtomicAddrSpace AddrSpace,
2536 bool IsCrossAddrSpaceOrdering,
2537 Position Pos)
const {
2540 MachineBasicBlock &
MBB = *
MI->getParent();
2547 if (canAffectGlobalAddrSpace(AddrSpace)) {
2548 if (Pos == Position::AFTER)
2558 case SIAtomicScope::SYSTEM:
2563 case SIAtomicScope::AGENT:
2565 if (
ST.hasGFX1250Insts()) {
2571 case SIAtomicScope::CLUSTER:
2572 case SIAtomicScope::WORKGROUP:
2574 case SIAtomicScope::WAVEFRONT:
2575 case SIAtomicScope::SINGLETHREAD:
2582 if (Pos == Position::AFTER)
2589 Changed |= insertWait(
MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2590 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
2595bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2597 bool IsVolatile,
bool IsNonTemporal,
bool IsLastUse =
false)
const {
2606 assert(
Op == SIMemOp::LOAD ||
Op == SIMemOp::STORE);
2613 }
else if (IsNonTemporal) {
2626 Changed |= insertWait(
MI, SIAtomicScope::SYSTEM, AddrSpace,
Op,
false,
2627 Position::AFTER, AtomicOrdering::Unordered);
2633bool SIGfx12CacheControl::finalizeStore(MachineInstr &
MI,
bool Atomic)
const {
2634 assert(
MI.mayStore() &&
"Not a Store inst");
2635 const bool IsRMW = (
MI.mayLoad() &&
MI.mayStore());
2640 if (Atomic &&
ST.requiresWaitXCntBeforeAtomicStores() &&
TII->isFLAT(
MI)) {
2641 MachineBasicBlock &
MBB = *
MI.getParent();
2650 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2657 Changed |= insertWaitsBeforeSystemScopeStore(
MI.getIterator());
2662bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &
MI)
const {
2663 if (!
ST.hasGFX1250Insts())
2667 MachineOperand *
CPol =
TII->getNamedOperand(
MI, OpName::cpol);
2668 assert(CPol &&
"No CPol operand?");
2676 SIAtomicScope Scope,
2677 SIAtomicAddrSpace AddrSpace)
const {
2680 if (canAffectGlobalAddrSpace(AddrSpace)) {
2682 case SIAtomicScope::SYSTEM:
2685 case SIAtomicScope::AGENT:
2688 case SIAtomicScope::CLUSTER:
2691 case SIAtomicScope::WORKGROUP:
2694 if (!
ST.isCuModeEnabled())
2697 case SIAtomicScope::WAVEFRONT:
2698 case SIAtomicScope::SINGLETHREAD:
2716bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2717 if (AtomicPseudoMIs.empty())
2720 for (
auto &
MI : AtomicPseudoMIs)
2721 MI->eraseFromParent();
2723 AtomicPseudoMIs.clear();
2727bool SIMemoryLegalizer::expandLoad(
const SIMemOpInfo &MOI,
2733 if (MOI.isAtomic()) {
2735 if (Order == AtomicOrdering::Monotonic ||
2736 Order == AtomicOrdering::Acquire ||
2737 Order == AtomicOrdering::SequentiallyConsistent) {
2738 Changed |= CC->enableLoadCacheBypass(
MI, MOI.getScope(),
2739 MOI.getOrderingAddrSpace());
2744 if (MOI.isCooperative())
2745 Changed |= CC->handleCooperativeAtomic(*
MI);
2747 if (Order == AtomicOrdering::SequentiallyConsistent)
2748 Changed |= CC->insertWait(
MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2749 SIMemOp::LOAD | SIMemOp::STORE,
2750 MOI.getIsCrossAddressSpaceOrdering(),
2751 Position::BEFORE, Order);
2753 if (Order == AtomicOrdering::Acquire ||
2754 Order == AtomicOrdering::SequentiallyConsistent) {
2756 MI, MOI.getScope(), MOI.getInstrAddrSpace(), SIMemOp::LOAD,
2757 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2758 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2759 MOI.getOrderingAddrSpace(),
2769 Changed |= CC->enableVolatileAndOrNonTemporal(
2770 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2771 MOI.isNonTemporal(), MOI.isLastUse());
2776bool SIMemoryLegalizer::expandStore(
const SIMemOpInfo &MOI,
2782 MachineInstr &StoreMI = *
MI;
2784 if (MOI.isAtomic()) {
2785 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2786 MOI.getOrdering() == AtomicOrdering::Release ||
2787 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2788 Changed |= CC->enableStoreCacheBypass(
MI, MOI.getScope(),
2789 MOI.getOrderingAddrSpace());
2794 if (MOI.isCooperative())
2795 Changed |= CC->handleCooperativeAtomic(*
MI);
2797 if (MOI.getOrdering() == AtomicOrdering::Release ||
2798 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2799 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2800 MOI.getOrderingAddrSpace(),
2801 MOI.getIsCrossAddressSpaceOrdering(),
2804 Changed |= CC->finalizeStore(StoreMI,
true);
2811 Changed |= CC->enableVolatileAndOrNonTemporal(
2812 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2813 MOI.isNonTemporal());
2817 Changed |= CC->finalizeStore(StoreMI,
false);
2821bool SIMemoryLegalizer::expandAtomicFence(
const SIMemOpInfo &MOI,
2823 assert(
MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2825 AtomicPseudoMIs.push_back(
MI);
2828 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2830 if (MOI.isAtomic()) {
2832 if (Order == AtomicOrdering::Acquire) {
2834 MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2835 MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE, Order);
2838 if (Order == AtomicOrdering::Release ||
2839 Order == AtomicOrdering::AcquireRelease ||
2840 Order == AtomicOrdering::SequentiallyConsistent)
2848 Changed |= CC->insertRelease(
MI, MOI.getScope(), OrderingAddrSpace,
2849 MOI.getIsCrossAddressSpaceOrdering(),
2857 if (Order == AtomicOrdering::Acquire ||
2858 Order == AtomicOrdering::AcquireRelease ||
2859 Order == AtomicOrdering::SequentiallyConsistent)
2860 Changed |= CC->insertAcquire(
MI, MOI.getScope(), OrderingAddrSpace,
2869bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(
const SIMemOpInfo &MOI,
2874 MachineInstr &RMWMI = *
MI;
2876 if (MOI.isAtomic()) {
2878 if (Order == AtomicOrdering::Monotonic ||
2879 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2880 Order == AtomicOrdering::AcquireRelease ||
2881 Order == AtomicOrdering::SequentiallyConsistent) {
2882 Changed |= CC->enableRMWCacheBypass(
MI, MOI.getScope(),
2883 MOI.getInstrAddrSpace());
2886 if (Order == AtomicOrdering::Release ||
2887 Order == AtomicOrdering::AcquireRelease ||
2888 Order == AtomicOrdering::SequentiallyConsistent ||
2889 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2890 Changed |= CC->insertRelease(
MI, MOI.getScope(),
2891 MOI.getOrderingAddrSpace(),
2892 MOI.getIsCrossAddressSpaceOrdering(),
2895 if (Order == AtomicOrdering::Acquire ||
2896 Order == AtomicOrdering::AcquireRelease ||
2897 Order == AtomicOrdering::SequentiallyConsistent ||
2898 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2899 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2901 MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2902 isAtomicRet(*
MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2903 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
2904 Changed |= CC->insertAcquire(
MI, MOI.getScope(),
2905 MOI.getOrderingAddrSpace(),
2909 Changed |= CC->finalizeStore(RMWMI,
true);
2916bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2917 const MachineModuleInfo &MMI =
2918 getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
2919 return SIMemoryLegalizer(MMI).run(MF);
2926 .getCachedResult<MachineModuleAnalysis>(
2928 assert(MMI &&
"MachineModuleAnalysis must be available");
2929 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2939 CC = SICacheControl::create(ST);
2941 for (
auto &
MBB : MF) {
2945 if (
MI->isBundle() &&
MI->mayLoadOrStore()) {
2948 I != E &&
I->isBundledWithPred(); ++
I) {
2949 I->unbundleFromPred();
2952 MO.setIsInternalRead(
false);
2955 MI->eraseFromParent();
2956 MI =
II->getIterator();
2959 if (
ST.getInstrInfo()->isBarrierStart(
MI->getOpcode())) {
2967 if (
const auto &MOI = MOA.getLoadInfo(
MI))
2969 else if (
const auto &MOI = MOA.getStoreInfo(
MI)) {
2971 }
else if (
const auto &MOI = MOA.getAtomicFenceInfo(
MI))
2973 else if (
const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(
MI))
2974 Changed |= expandAtomicCmpxchgOrRmw(*MOI,
MI);
2978 Changed |= removeAtomicPseudoMIs();
2984char SIMemoryLegalizerLegacy::
ID = 0;
2988 return new SIMemoryLegalizerLegacy();
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isAtomicRet(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
DWARFExpression::Operation Op
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()