LLVM 22.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
46 LLVMAMDHSATrap = 0x02,
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
65 unsigned Gen = INVALID;
67 int LDSBankCount = 0;
69
70 // Possibly statically set by tablegen, but may want to be overridden.
71 bool FastDenormalF32 = false;
72 bool HalfRate64Ops = false;
73 bool FullRate64Ops = false;
74
75 // Dynamically set bits that enable features.
76 bool FlatForGlobal = false;
78 bool BackOffBarrier = false;
80 bool UnalignedAccessMode = false;
82 bool HasApertureRegs = false;
83 bool SupportsXNACK = false;
84 bool KernargPreload = false;
85
86 // This should not be used directly. 'TargetID' tracks the dynamic settings
87 // for XNACK.
88 bool EnableXNACK = false;
89
90 bool EnableTgSplit = false;
91 bool EnableCuMode = false;
92 bool TrapHandler = false;
93 bool EnablePreciseMemory = false;
94
95 // Used as options.
96 bool EnableLoadStoreOpt = false;
98 bool EnableSIScheduler = false;
99 bool EnableDS128 = false;
101 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasFP8E5M3Insts = false;
170 bool HasCvtFP8Vop1Bug = false;
171 bool HasPkFmacF16Inst = false;
192 bool HasXF32Insts = false;
193 /// The maximum number of instructions that may be placed within an S_CLAUSE,
194 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
195 /// indicates a lack of S_CLAUSE support.
197 bool SupportsSRAMECC = false;
198 bool DynamicVGPR = false;
200 bool HasVMemToLDSLoad = false;
201 bool RequiresAlignVGPR = false;
202
203 // This should not be used directly. 'TargetID' tracks the dynamic settings
204 // for SRAMECC.
205 bool EnableSRAMECC = false;
206
207 bool HasNoSdstCMPX = false;
208 bool HasVscnt = false;
209 bool HasWaitXcnt = false;
210 bool HasGetWaveIdInst = false;
211 bool HasSMemTimeInst = false;
214 bool HasVOP3Literal = false;
215 bool HasNoDataDepHazard = false;
216 bool FlatAddressSpace = false;
217 bool FlatInstOffsets = false;
218 bool FlatGlobalInsts = false;
219 bool FlatScratchInsts = false;
220 bool FlatGVSMode = false;
223 bool EnableFlatScratch = false;
225 bool HasGDS = false;
226 bool HasGWS = false;
227 bool AddNoCarryInsts = false;
228 bool HasUnpackedD16VMem = false;
229 bool LDSMisalignedBug = false;
232 bool UnalignedDSAccess = false;
233 bool HasPackedTID = false;
234 bool ScalarizeGlobal = false;
235 bool HasSALUFloatInsts = false;
238 bool Has64BitLiterals = false;
239 bool HasBitOp3Insts = false;
240 bool HasTanhInsts = false;
243 bool HasPrngInst = false;
245 bool HasPermlane16Swap = false;
246 bool HasPermlane32Swap = false;
251 bool HasVmemPrefInsts = false;
253 bool HasSafeCUPrefetch = false;
254 bool HasCUStores = false;
257 bool HasNSAtoVMEMBug = false;
258 bool HasNSAClauseBug = false;
259 bool HasOffset3fBug = false;
265 bool Has1_5xVGPRs = false;
266 bool HasMADIntraFwdBug = false;
267 bool HasVOPDInsts = false;
271 bool HasAshrPkInsts = false;
275 bool HasMin3Max3PKF16 = false;
277 bool HasLshlAddU64Inst = false;
278 bool HasAddSubU64Insts = false;
279 bool HasMadU32Inst = false;
283
284 bool RequiresCOV6 = false;
287
288 // Dummy feature to use for assembler in tablegen.
289 bool FeatureDisable = false;
290
291private:
292 SIInstrInfo InstrInfo;
293 SITargetLowering TLInfo;
294 SIFrameLowering FrameLowering;
295
296public:
297 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
298 const GCNTargetMachine &TM);
299 ~GCNSubtarget() override;
300
302 StringRef GPU, StringRef FS);
303
304 /// Diagnose inconsistent subtarget features before attempting to codegen
305 /// function \p F.
306 void checkSubtargetFeatures(const Function &F) const;
307
308 const SIInstrInfo *getInstrInfo() const override {
309 return &InstrInfo;
310 }
311
312 const SIFrameLowering *getFrameLowering() const override {
313 return &FrameLowering;
314 }
315
316 const SITargetLowering *getTargetLowering() const override {
317 return &TLInfo;
318 }
319
320 const SIRegisterInfo *getRegisterInfo() const override {
321 return &InstrInfo.getRegisterInfo();
322 }
323
324 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
325
326 const CallLowering *getCallLowering() const override {
327 return CallLoweringInfo.get();
328 }
329
330 const InlineAsmLowering *getInlineAsmLowering() const override {
331 return InlineAsmLoweringInfo.get();
332 }
333
335 return InstSelector.get();
336 }
337
338 const LegalizerInfo *getLegalizerInfo() const override {
339 return Legalizer.get();
340 }
341
342 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
343 return RegBankInfo.get();
344 }
345
347 return TargetID;
348 }
349
351 return &InstrItins;
352 }
353
355
357 return (Generation)Gen;
358 }
359
360 unsigned getMaxWaveScratchSize() const {
361 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
362 if (getGeneration() >= GFX12) {
363 // 18-bit field in units of 64-dword.
364 return (64 * 4) * ((1 << 18) - 1);
365 }
366 if (getGeneration() == GFX11) {
367 // 15-bit field in units of 64-dword.
368 return (64 * 4) * ((1 << 15) - 1);
369 }
370 // 13-bit field in units of 256-dword.
371 return (256 * 4) * ((1 << 13) - 1);
372 }
373
374 /// Return the number of high bits known to be zero for a frame index.
377 }
378
379 int getLDSBankCount() const {
380 return LDSBankCount;
381 }
382
383 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
384 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
385 }
386
387 unsigned getConstantBusLimit(unsigned Opcode) const;
388
389 /// Returns if the result of this instruction with a 16-bit result returned in
390 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
391 /// the original value.
392 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
393
394 bool supportsWGP() const {
395 if (GFX1250Insts)
396 return false;
397 return getGeneration() >= GFX10;
398 }
399
400 bool hasIntClamp() const {
401 return HasIntClamp;
402 }
403
404 bool hasFP64() const {
405 return FP64;
406 }
407
408 bool hasMIMG_R128() const {
409 return MIMG_R128;
410 }
411
412 bool hasHWFP64() const {
413 return FP64;
414 }
415
416 bool hasHalfRate64Ops() const {
417 return HalfRate64Ops;
418 }
419
420 bool hasFullRate64Ops() const {
421 return FullRate64Ops;
422 }
423
424 bool hasAddr64() const {
426 }
427
428 bool hasFlat() const {
430 }
431
432 // Return true if the target only has the reverse operand versions of VALU
433 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
434 bool hasOnlyRevVALUShifts() const {
436 }
437
438 bool hasFractBug() const {
440 }
441
442 bool hasBFE() const {
443 return true;
444 }
445
446 bool hasBFI() const {
447 return true;
448 }
449
450 bool hasBFM() const {
451 return hasBFE();
452 }
453
454 bool hasBCNT(unsigned Size) const {
455 return true;
456 }
457
458 bool hasFFBL() const {
459 return true;
460 }
461
462 bool hasFFBH() const {
463 return true;
464 }
465
466 bool hasMed3_16() const {
468 }
469
470 bool hasMin3Max3_16() const {
472 }
473
474 bool hasFmaMixInsts() const {
475 return HasFmaMixInsts;
476 }
477
478 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
479
480 bool hasCARRY() const {
481 return true;
482 }
483
484 bool hasFMA() const {
485 return FMA;
486 }
487
488 bool hasSwap() const {
489 return GFX9Insts;
490 }
491
492 bool hasScalarPackInsts() const {
493 return GFX9Insts;
494 }
495
496 bool hasScalarMulHiInsts() const {
497 return GFX9Insts;
498 }
499
500 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
501
504 }
505
507 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
508 return getGeneration() >= GFX9;
509 }
510
511 /// True if the offset field of DS instructions works as expected. On SI, the
512 /// offset uses a 16-bit adder and does not always wrap properly.
513 bool hasUsableDSOffset() const {
514 return getGeneration() >= SEA_ISLANDS;
515 }
516
519 }
520
521 /// Condition output from div_scale is usable.
524 }
525
526 /// Extra wait hazard is needed in some cases before
527 /// s_cbranch_vccnz/s_cbranch_vccz.
528 bool hasReadVCCZBug() const {
529 return getGeneration() <= SEA_ISLANDS;
530 }
531
532 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
534 return getGeneration() >= GFX10;
535 }
536
537 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
538 /// was written by a VALU instruction.
541 }
542
543 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
544 /// SGPR was written by a VALU Instruction.
547 }
548
549 bool hasRFEHazards() const {
551 }
552
553 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
554 unsigned getSetRegWaitStates() const {
555 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
556 }
557
558 bool dumpCode() const {
559 return DumpCode;
560 }
561
562 /// Return the amount of LDS that can be used that will not restrict the
563 /// occupancy lower than WaveCount.
564 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
565 const Function &) const;
566
569 }
570
571 /// \returns If target supports S_DENORM_MODE.
572 bool hasDenormModeInst() const {
574 }
575
576 bool useFlatForGlobal() const {
577 return FlatForGlobal;
578 }
579
580 /// \returns If target supports ds_read/write_b128 and user enables generation
581 /// of ds_read/write_b128.
582 bool useDS128() const {
583 return CIInsts && EnableDS128;
584 }
585
586 /// \return If target supports ds_read/write_b96/128.
587 bool hasDS96AndDS128() const {
588 return CIInsts;
589 }
590
591 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
592 bool haveRoundOpsF64() const {
593 return CIInsts;
594 }
595
596 /// \returns If MUBUF instructions always perform range checking, even for
597 /// buffer resources used for private memory access.
600 }
601
602 /// \returns If target requires PRT Struct NULL support (zero result registers
603 /// for sparse texture support).
604 bool usePRTStrictNull() const {
605 return EnablePRTStrictNull;
606 }
607
610 }
611
612 /// \returns true if the target supports backing off of s_barrier instructions
613 /// when an exception is raised.
615 return BackOffBarrier;
616 }
617
620 }
621
624 }
625
626 bool hasUnalignedDSAccess() const {
627 return UnalignedDSAccess;
628 }
629
632 }
633
636 }
637
640 }
641
643 return UnalignedAccessMode;
644 }
645
647
648 bool hasApertureRegs() const {
649 return HasApertureRegs;
650 }
651
652 bool isTrapHandlerEnabled() const {
653 return TrapHandler;
654 }
655
656 bool isXNACKEnabled() const {
657 return TargetID.isXnackOnOrAny();
658 }
659
660 bool isTgSplitEnabled() const {
661 return EnableTgSplit;
662 }
663
664 bool isCuModeEnabled() const {
665 return EnableCuMode;
666 }
667
669
670 bool hasFlatAddressSpace() const {
671 return FlatAddressSpace;
672 }
673
674 bool hasFlatScrRegister() const {
675 return hasFlatAddressSpace();
676 }
677
678 bool hasFlatInstOffsets() const {
679 return FlatInstOffsets;
680 }
681
682 bool hasFlatGlobalInsts() const {
683 return FlatGlobalInsts;
684 }
685
686 bool hasFlatScratchInsts() const {
687 return FlatScratchInsts;
688 }
689
690 // Check if target supports ST addressing mode with FLAT scratch instructions.
691 // The ST addressing mode means no registers are used, either VGPR or SGPR,
692 // but only immediate offset is swizzled and added to the FLAT scratch base.
693 bool hasFlatScratchSTMode() const {
695 }
696
697 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
698
701 }
702
703 bool enableFlatScratch() const {
704 return flatScratchIsArchitected() ||
706 }
707
708 bool hasGlobalAddTidInsts() const {
709 return GFX10_BEncoding;
710 }
711
712 bool hasAtomicCSub() const {
713 return GFX10_BEncoding;
714 }
715
716 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
717
718 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
719
720 bool hasExportInsts() const {
721 return !hasGFX940Insts() && !hasGFX1250Insts();
722 }
723
724 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
725
726 // DS_ADD_F64/DS_ADD_RTN_F64
727 bool hasLdsAtomicAddF64() const {
728 return hasGFX90AInsts() || hasGFX1250Insts();
729 }
730
732 return getGeneration() >= GFX9;
733 }
734
737 }
738
740 return getGeneration() > GFX9;
741 }
742
743 bool hasD16LoadStore() const {
744 return getGeneration() >= GFX9;
745 }
746
749 }
750
751 bool hasD16Images() const {
753 }
754
755 /// Return if most LDS instructions have an m0 use that require m0 to be
756 /// initialized.
757 bool ldsRequiresM0Init() const {
758 return getGeneration() < GFX9;
759 }
760
761 // True if the hardware rewinds and replays GWS operations if a wave is
762 // preempted.
763 //
764 // If this is false, a GWS operation requires testing if a nack set the
765 // MEM_VIOL bit, and repeating if so.
766 bool hasGWSAutoReplay() const {
767 return getGeneration() >= GFX9;
768 }
769
770 /// \returns if target has ds_gws_sema_release_all instruction.
771 bool hasGWSSemaReleaseAll() const {
772 return CIInsts;
773 }
774
775 /// \returns true if the target has integer add/sub instructions that do not
776 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
777 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
778 /// for saturation.
779 bool hasAddNoCarry() const {
780 return AddNoCarryInsts;
781 }
782
783 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
784
785 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
786
787 bool hasUnpackedD16VMem() const {
788 return HasUnpackedD16VMem;
789 }
790
791 // Covers VS/PS/CS graphics shaders
792 bool isMesaGfxShader(const Function &F) const {
793 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
794 }
795
796 bool hasMad64_32() const {
797 return getGeneration() >= SEA_ISLANDS;
798 }
799
800 bool hasSDWAOmod() const {
801 return HasSDWAOmod;
802 }
803
804 bool hasSDWAScalar() const {
805 return HasSDWAScalar;
806 }
807
808 bool hasSDWASdst() const {
809 return HasSDWASdst;
810 }
811
812 bool hasSDWAMac() const {
813 return HasSDWAMac;
814 }
815
816 bool hasSDWAOutModsVOPC() const {
817 return HasSDWAOutModsVOPC;
818 }
819
820 bool hasDLInsts() const {
821 return HasDLInsts;
822 }
823
824 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
825
826 bool hasDot1Insts() const {
827 return HasDot1Insts;
828 }
829
830 bool hasDot2Insts() const {
831 return HasDot2Insts;
832 }
833
834 bool hasDot3Insts() const {
835 return HasDot3Insts;
836 }
837
838 bool hasDot4Insts() const {
839 return HasDot4Insts;
840 }
841
842 bool hasDot5Insts() const {
843 return HasDot5Insts;
844 }
845
846 bool hasDot6Insts() const {
847 return HasDot6Insts;
848 }
849
850 bool hasDot7Insts() const {
851 return HasDot7Insts;
852 }
853
854 bool hasDot8Insts() const {
855 return HasDot8Insts;
856 }
857
858 bool hasDot9Insts() const {
859 return HasDot9Insts;
860 }
861
862 bool hasDot10Insts() const {
863 return HasDot10Insts;
864 }
865
866 bool hasDot11Insts() const {
867 return HasDot11Insts;
868 }
869
870 bool hasDot12Insts() const {
871 return HasDot12Insts;
872 }
873
874 bool hasDot13Insts() const {
875 return HasDot13Insts;
876 }
877
878 bool hasMAIInsts() const {
879 return HasMAIInsts;
880 }
881
882 bool hasFP8Insts() const {
883 return HasFP8Insts;
884 }
885
887
888 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
889
890 bool hasPkFmacF16Inst() const {
891 return HasPkFmacF16Inst;
892 }
893
896 }
897
900 }
901
904 }
905
908 }
909
911
913
914 bool hasAtomicFaddInsts() const {
916 }
917
919
921
924 }
925
928 }
929
932 }
933
936 }
937
939
940 /// \return true if the target has flat, global, and buffer atomic fadd for
941 /// double.
944 }
945
946 /// \return true if the target's flat, global, and buffer atomic fadd for
947 /// float supports denormal handling.
950 }
951
952 /// \return true if atomic operations targeting fine-grained memory work
953 /// correctly at device scope, in allocations in host or peer PCIe device
954 /// memory.
957 }
958
959 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
960 /// via CAS loop.
963 }
964
966
969 }
970
971 bool hasNoSdstCMPX() const {
972 return HasNoSdstCMPX;
973 }
974
975 bool hasVscnt() const {
976 return HasVscnt;
977 }
978
979 bool hasGetWaveIdInst() const {
980 return HasGetWaveIdInst;
981 }
982
983 bool hasSMemTimeInst() const {
984 return HasSMemTimeInst;
985 }
986
989 }
990
993 }
994
995 bool hasVOP3Literal() const {
996 return HasVOP3Literal;
997 }
998
999 bool hasNoDataDepHazard() const {
1000 return HasNoDataDepHazard;
1001 }
1002
1004 return getGeneration() < SEA_ISLANDS;
1005 }
1006
1007 bool hasInstPrefetch() const {
1008 return getGeneration() == GFX10 || getGeneration() == GFX11;
1009 }
1010
1011 bool hasPrefetch() const { return GFX12Insts; }
1012
1013 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1014
1016
1017 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1018
1019 bool hasCUStores() const { return HasCUStores; }
1020
1021 // Has s_cmpk_* instructions.
1022 bool hasSCmpK() const { return getGeneration() < GFX12; }
1023
1024 // Scratch is allocated in 256 dword per wave blocks for the entire
1025 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1026 // is 4-byte aligned.
1027 //
1028 // Only 4-byte alignment is really needed to access anything. Transformations
1029 // on the pointer value itself may rely on the alignment / known low bits of
1030 // the pointer. Set this to something above the minimum to avoid needing
1031 // dynamic realignment in common cases.
1032 Align getStackAlignment() const { return Align(16); }
1033
1034 bool enableMachineScheduler() const override {
1035 return true;
1036 }
1037
1038 bool useAA() const override;
1039
1040 bool enableSubRegLiveness() const override {
1041 return true;
1042 }
1043
1046
1047 // static wrappers
1048 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1049
1050 // XXX - Why is this here if it isn't in the default pass set?
1051 bool enableEarlyIfConversion() const override {
1052 return true;
1053 }
1054
1056 const SchedRegion &Region) const override;
1057
1059 const SchedRegion &Region) const override;
1060
1061 void mirFileLoaded(MachineFunction &MF) const override;
1062
1063 unsigned getMaxNumUserSGPRs() const {
1064 return AMDGPU::getMaxNumUserSGPRs(*this);
1065 }
1066
1067 bool hasSMemRealTime() const {
1068 return HasSMemRealTime;
1069 }
1070
1071 bool hasMovrel() const {
1072 return HasMovrel;
1073 }
1074
1075 bool hasVGPRIndexMode() const {
1076 return HasVGPRIndexMode;
1077 }
1078
1079 bool useVGPRIndexMode() const;
1080
1082 return getGeneration() >= VOLCANIC_ISLANDS;
1083 }
1084
1086
1087 bool hasScalarStores() const {
1088 return HasScalarStores;
1089 }
1090
1091 bool hasScalarAtomics() const {
1092 return HasScalarAtomics;
1093 }
1094
1095 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1097
1098 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1099 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1100
1101 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1102 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1103
1104 bool hasDPP() const {
1105 return HasDPP;
1106 }
1107
1108 bool hasDPPBroadcasts() const {
1109 return HasDPP && getGeneration() < GFX10;
1110 }
1111
1113 return HasDPP && getGeneration() < GFX10;
1114 }
1115
1116 bool hasDPP8() const {
1117 return HasDPP8;
1118 }
1119
1120 bool hasDPALU_DPP() const {
1121 return HasDPALU_DPP;
1122 }
1123
1124 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1125
1126 bool hasPackedFP32Ops() const {
1127 return HasPackedFP32Ops;
1128 }
1129
1130 // Has V_PK_MOV_B32 opcode
1131 bool hasPkMovB32() const {
1132 return GFX90AInsts;
1133 }
1134
1136 return getGeneration() >= GFX10 || hasGFX940Insts();
1137 }
1138
1139 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1140
1141 bool hasImageInsts() const {
1142 return HasImageInsts;
1143 }
1144
1146 return HasExtendedImageInsts;
1147 }
1148
1149 bool hasR128A16() const {
1150 return HasR128A16;
1151 }
1152
1153 bool hasA16() const { return HasA16; }
1154
1155 bool hasG16() const { return HasG16; }
1156
1157 bool hasOffset3fBug() const {
1158 return HasOffset3fBug;
1159 }
1160
1162
1164
1165 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1166
1168
1170
1171 bool hasNSAEncoding() const { return HasNSAEncoding; }
1172
1173 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1174
1176
1177 unsigned getNSAMaxSize(bool HasSampler = false) const {
1178 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1179 }
1180
1181 bool hasGFX10_AEncoding() const {
1182 return GFX10_AEncoding;
1183 }
1184
1185 bool hasGFX10_BEncoding() const {
1186 return GFX10_BEncoding;
1187 }
1188
1189 bool hasGFX10_3Insts() const {
1190 return GFX10_3Insts;
1191 }
1192
1193 bool hasMadF16() const;
1194
1195 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1196
1197 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1198
1199 // Scalar and global loads support scale_offset bit.
1200 bool hasScaleOffset() const { return GFX1250Insts; }
1201
1202 bool hasFlatGVSMode() const { return FlatGVSMode; }
1203
1204 // FLAT GLOBAL VOffset is signed
1205 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1206
1207 bool enableSIScheduler() const {
1208 return EnableSIScheduler;
1209 }
1210
1211 bool loadStoreOptEnabled() const {
1212 return EnableLoadStoreOpt;
1213 }
1214
1215 bool hasSGPRInitBug() const {
1216 return SGPRInitBug;
1217 }
1218
1220 return UserSGPRInit16Bug && isWave32();
1221 }
1222
1224
1227 }
1228
1231 }
1232
1235 }
1236
1237 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1239 return CIInsts;
1240 }
1241
1244 }
1245
1249 }
1250
1253 }
1254
1257 }
1258
1261 }
1262
1265 }
1266
1269 }
1270
1271 bool hasLDSMisalignedBug() const {
1272 return LDSMisalignedBug && !EnableCuMode;
1273 }
1274
1276 return HasInstFwdPrefetchBug;
1277 }
1278
1280 return HasVcmpxExecWARHazard;
1281 }
1282
1285 }
1286
1287 // Shift amount of a 64 bit shift cannot be a highest allocated register
1288 // if also at the end of the allocation block.
1290 return GFX90AInsts && !GFX940Insts;
1291 }
1292
1293 // Has one cycle hazard on transcendental instruction feeding a
1294 // non transcendental VALU.
1295 bool hasTransForwardingHazard() const { return GFX940Insts; }
1296
1297 // Has one cycle hazard on a VALU instruction partially writing dst with
1298 // a shift of result bits feeding another VALU instruction.
1300
1301 // Cannot use op_sel with v_dot instructions.
1302 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1303
1304 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1305 bool hasVDecCoExecHazard() const {
1306 return GFX940Insts;
1307 }
1308
1309 bool hasNSAtoVMEMBug() const {
1310 return HasNSAtoVMEMBug;
1311 }
1312
1313 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1314
1315 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1316
1317 bool hasGFX90AInsts() const { return GFX90AInsts; }
1318
1320 return getGeneration() == GFX10;
1321 }
1322
1323 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1324
1325 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1326
1327 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1328
1330 return getGeneration() == GFX11;
1331 }
1332
1334
1336
1337 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1338
1340
1343 }
1344
1345 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1346
1347 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1348
1350 return GFX1250Insts && getGeneration() == GFX12;
1351 }
1352
1353 /// Return if operations acting on VGPR tuples require even alignment.
1354 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1355
1356 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1357 bool hasSPackHL() const { return GFX11Insts; }
1358
1359 /// Return true if the target's EXP instruction has the COMPR flag, which
1360 /// affects the meaning of the EN (enable) bits.
1361 bool hasCompressedExport() const { return !GFX11Insts; }
1362
1363 /// Return true if the target's EXP instruction supports the NULL export
1364 /// target.
1365 bool hasNullExportTarget() const { return !GFX11Insts; }
1366
1367 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1368
1369 bool hasVOPDInsts() const { return HasVOPDInsts; }
1370
1372
1373 /// Return true if the target has the S_DELAY_ALU instruction.
1374 bool hasDelayAlu() const { return GFX11Insts; }
1375
1376 bool hasPackedTID() const { return HasPackedTID; }
1377
1378 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1379 // hasGFX90AInsts is also true.
1380 bool hasGFX940Insts() const { return GFX940Insts; }
1381
1382 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1383 // hasGFX940Insts and hasGFX90AInsts are also true.
1384 bool hasGFX950Insts() const { return GFX950Insts; }
1385
1386 /// Returns true if the target supports
1387 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1388 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1389 bool hasLDSLoadB96_B128() const {
1390 return hasGFX950Insts();
1391 }
1392
1393 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1394
1395 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1396
1398
1400
1402
1404
1405 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1406 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1407 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1408
1409 /// \returns true if inline constants are not supported for F16 pseudo
1410 /// scalar transcendentals.
1412 return getGeneration() == GFX12;
1413 }
1414
1415 /// \returns true if the target has instructions with xf32 format support.
1416 bool hasXF32Insts() const { return HasXF32Insts; }
1417
1418 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1419
1420 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1421 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1422 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1423
1426 }
1427
1430 }
1431
1432 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1433
1434 bool hasTanhInsts() const { return HasTanhInsts; }
1435
1437
1438 bool hasAddPC64Inst() const { return GFX1250Insts; }
1439
1442 }
1443
1445
1446 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1447 /// GFX1250.
1448 bool hasWaitXCnt() const { return HasWaitXcnt; }
1449
1450 // A single DWORD instructions can use a 64-bit literal.
1451 bool has64BitLiterals() const { return Has64BitLiterals; }
1452
1454
1456
1457 /// \returns The maximum number of instructions that can be enclosed in an
1458 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1459 /// instruction.
1460 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1461
1462 bool hasPrngInst() const { return HasPrngInst; }
1463
1465
1466 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1467 /// SGPRs
1468 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1469
1470 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1471 /// VGPRs
1472 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1473 unsigned DynamicVGPRBlockSize) const;
1474
1475 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1476 /// be achieved when the only function running on a CU is \p F, each workgroup
1477 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1478 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1479 /// range, so this returns a range as well.
1480 ///
1481 /// Note that occupancy can be affected by the scratch allocation as well, but
1482 /// we do not have enough information to compute it.
1483 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1484 unsigned LDSSize = 0,
1485 unsigned NumSGPRs = 0,
1486 unsigned NumVGPRs = 0) const;
1487
1488 /// \returns true if the flat_scratch register should be initialized with the
1489 /// pointer to the wave's scratch memory rather than a size and offset.
1492 }
1493
1494 /// \returns true if the flat_scratch register is initialized by the HW.
1495 /// In this case it is readonly.
1497
1498 /// \returns true if the architected SGPRs are enabled.
1500
1501 /// \returns true if Global Data Share is supported.
1502 bool hasGDS() const { return HasGDS; }
1503
1504 /// \returns true if Global Wave Sync is supported.
1505 bool hasGWS() const { return HasGWS; }
1506
1507 /// \returns true if the machine has merged shaders in which s0-s7 are
1508 /// reserved by the hardware and user SGPRs start at s8
1509 bool hasMergedShaders() const {
1510 return getGeneration() >= GFX9;
1511 }
1512
1513 // \returns true if the target supports the pre-NGG legacy geometry path.
1514 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1515
1516 // \returns true if preloading kernel arguments is supported.
1517 bool hasKernargPreload() const { return KernargPreload; }
1518
1519 // \returns true if the target has split barriers feature
1520 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1521
1522 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1523 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1524
1525 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1526 // no-return form.
1528
1529 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1530 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1531
1532 // \returns true if the target has IEEE kernel descriptor mode bit
1533 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1534
1535 // \returns true if the target has IEEE fminimum/fmaximum instructions
1537
1538 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1539 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1540
1541 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1542 /// values.
1543 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1544
1545 bool hasGFX1250Insts() const { return GFX1250Insts; }
1546
1547 bool hasVOPD3() const { return GFX1250Insts; }
1548
1549 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1550 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1551
1552 // \returns true if the target has V_MAD_U32 instruction.
1553 bool hasMadU32Inst() const { return HasMadU32Inst; }
1554
1555 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1556 bool hasVectorMulU64() const { return GFX1250Insts; }
1557
1558 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1559 // instructions.
1560 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1561
1562 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1563 bool hasIntMinMax64() const { return GFX1250Insts; }
1564
1565 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1566 bool hasAddMinMaxInsts() const { return GFX1250Insts; }
1567
1568 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1569 bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
1570
1571 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1572 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1573
1574 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1575 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1576
1577 // \returns true if target has S_SETPRIO_INC_WG instruction.
1579
1580 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1581 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1582 // extended VA to 57 bits.
1583 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1584
1585 // \returns true if the target needs to create a prolog for backward
1586 // compatibility when preloading kernel arguments.
1588 return hasKernargPreload() && !GFX1250Insts;
1589 }
1590
1591 /// \returns SGPR allocation granularity supported by the subtarget.
1592 unsigned getSGPRAllocGranule() const {
1594 }
1595
1596 /// \returns SGPR encoding granularity supported by the subtarget.
1597 unsigned getSGPREncodingGranule() const {
1599 }
1600
1601 /// \returns Total number of SGPRs supported by the subtarget.
1602 unsigned getTotalNumSGPRs() const {
1604 }
1605
1606 /// \returns Addressable number of SGPRs supported by the subtarget.
1607 unsigned getAddressableNumSGPRs() const {
1609 }
1610
1611 /// \returns Minimum number of SGPRs that meets the given number of waves per
1612 /// execution unit requirement supported by the subtarget.
1613 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1614 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1615 }
1616
1617 /// \returns Maximum number of SGPRs that meets the given number of waves per
1618 /// execution unit requirement supported by the subtarget.
1619 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1620 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1621 }
1622
1623 /// \returns Reserved number of SGPRs. This is common
1624 /// utility function called by MachineFunction and
1625 /// Function variants of getReservedNumSGPRs.
1626 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1627 /// \returns Reserved number of SGPRs for given machine function \p MF.
1628 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1629
1630 /// \returns Reserved number of SGPRs for given function \p F.
1631 unsigned getReservedNumSGPRs(const Function &F) const;
1632
1633 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1634 unsigned getMaxNumPreloadedSGPRs() const;
1635
1636 /// \returns max num SGPRs. This is the common utility
1637 /// function called by MachineFunction and Function
1638 /// variants of getMaxNumSGPRs.
1639 unsigned getBaseMaxNumSGPRs(const Function &F,
1640 std::pair<unsigned, unsigned> WavesPerEU,
1641 unsigned PreloadedSGPRs,
1642 unsigned ReservedNumSGPRs) const;
1643
1644 /// \returns Maximum number of SGPRs that meets number of waves per execution
1645 /// unit requirement for function \p MF, or number of SGPRs explicitly
1646 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1647 ///
1648 /// \returns Value that meets number of waves per execution unit requirement
1649 /// if explicitly requested value cannot be converted to integer, violates
1650 /// subtarget's specifications, or does not meet number of waves per execution
1651 /// unit requirement.
1652 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1653
1654 /// \returns Maximum number of SGPRs that meets number of waves per execution
1655 /// unit requirement for function \p F, or number of SGPRs explicitly
1656 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1657 ///
1658 /// \returns Value that meets number of waves per execution unit requirement
1659 /// if explicitly requested value cannot be converted to integer, violates
1660 /// subtarget's specifications, or does not meet number of waves per execution
1661 /// unit requirement.
1662 unsigned getMaxNumSGPRs(const Function &F) const;
1663
1664 /// \returns VGPR allocation granularity supported by the subtarget.
1665 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1666 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1667 }
1668
1669 /// \returns VGPR encoding granularity supported by the subtarget.
1670 unsigned getVGPREncodingGranule() const {
1672 }
1673
1674 /// \returns Total number of VGPRs supported by the subtarget.
1675 unsigned getTotalNumVGPRs() const {
1677 }
1678
1679 /// \returns Addressable number of architectural VGPRs supported by the
1680 /// subtarget.
1683 }
1684
1685 /// \returns Addressable number of VGPRs supported by the subtarget.
1686 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1687 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1688 }
1689
1690 /// \returns the minimum number of VGPRs that will prevent achieving more than
1691 /// the specified number of waves \p WavesPerEU.
1692 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1693 unsigned DynamicVGPRBlockSize) const {
1694 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1695 DynamicVGPRBlockSize);
1696 }
1697
1698 /// \returns the maximum number of VGPRs that can be used and still achieved
1699 /// at least the specified number of waves \p WavesPerEU.
1700 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1701 unsigned DynamicVGPRBlockSize) const {
1702 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1703 DynamicVGPRBlockSize);
1704 }
1705
1706 /// \returns max num VGPRs. This is the common utility function
1707 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1708 unsigned
1710 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1711
1712 /// \returns Maximum number of VGPRs that meets number of waves per execution
1713 /// unit requirement for function \p F, or number of VGPRs explicitly
1714 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1715 ///
1716 /// \returns Value that meets number of waves per execution unit requirement
1717 /// if explicitly requested value cannot be converted to integer, violates
1718 /// subtarget's specifications, or does not meet number of waves per execution
1719 /// unit requirement.
1720 unsigned getMaxNumVGPRs(const Function &F) const;
1721
1722 unsigned getMaxNumAGPRs(const Function &F) const {
1723 return getMaxNumVGPRs(F);
1724 }
1725
1726 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1727 /// of waves per execution unit required for the function \p MF.
1728 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1729
1730 /// \returns Maximum number of VGPRs that meets number of waves per execution
1731 /// unit requirement for function \p MF, or number of VGPRs explicitly
1732 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1733 ///
1734 /// \returns Value that meets number of waves per execution unit requirement
1735 /// if explicitly requested value cannot be converted to integer, violates
1736 /// subtarget's specifications, or does not meet number of waves per execution
1737 /// unit requirement.
1738 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1739
1740 bool supportsWave32() const { return getGeneration() >= GFX10; }
1741
1742 bool supportsWave64() const { return !hasGFX1250Insts(); }
1743
1744 bool isWave32() const {
1745 return getWavefrontSize() == 32;
1746 }
1747
1748 bool isWave64() const {
1749 return getWavefrontSize() == 64;
1750 }
1751
1752 /// Returns if the wavesize of this subtarget is known reliable. This is false
1753 /// only for the a default target-cpu that does not have an explicit
1754 /// +wavefrontsize target feature.
1755 bool isWaveSizeKnown() const {
1756 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1757 hasFeature(AMDGPU::FeatureWavefrontSize64);
1758 }
1759
1761 return getRegisterInfo()->getBoolRC();
1762 }
1763
1764 /// \returns Maximum number of work groups per compute unit supported by the
1765 /// subtarget and limited by given \p FlatWorkGroupSize.
1766 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1767 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1768 }
1769
1770 /// \returns Minimum flat work group size supported by the subtarget.
1771 unsigned getMinFlatWorkGroupSize() const override {
1773 }
1774
1775 /// \returns Maximum flat work group size supported by the subtarget.
1776 unsigned getMaxFlatWorkGroupSize() const override {
1778 }
1779
1780 /// \returns Number of waves per execution unit required to support the given
1781 /// \p FlatWorkGroupSize.
1782 unsigned
1783 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1784 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1785 }
1786
1787 /// \returns Minimum number of waves per execution unit supported by the
1788 /// subtarget.
1789 unsigned getMinWavesPerEU() const override {
1791 }
1792
1793 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1794 SDep &Dep,
1795 const TargetSchedModel *SchedModel) const override;
1796
1797 // \returns true if it's beneficial on this subtarget for the scheduler to
1798 // cluster stores as well as loads.
1799 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1800
1801 // \returns the number of address arguments from which to enable MIMG NSA
1802 // on supported architectures.
1803 unsigned getNSAThreshold(const MachineFunction &MF) const;
1804
1805 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1806 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1808
1809 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1810 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1812
1813 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1814 unsigned getDynamicVGPRBlockSize() const {
1815 return DynamicVGPRBlockSize32 ? 32 : 16;
1816 }
1817
1819 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1820 // to the same register.
1821 return false;
1822 }
1823
1824 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1825 // and surronded by S_WAIT_ALU(0xFFE3).
1827 return getGeneration() == GFX12;
1828 }
1829
1830 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1831 // read.
1833 return GFX1250Insts && getGeneration() == GFX12;
1834 }
1835};
1836
1838public:
1839 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1840
1841 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1842
1843 bool hasDispatchPtr() const { return DispatchPtr; }
1844
1845 bool hasQueuePtr() const { return QueuePtr; }
1846
1847 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1848
1849 bool hasDispatchID() const { return DispatchID; }
1850
1851 bool hasFlatScratchInit() const { return FlatScratchInit; }
1852
1853 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1854
1855 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1856
1857 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1858
1859 unsigned getNumFreeUserSGPRs();
1860
1861 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1862
1863 enum UserSGPRID : unsigned {
1873
1874 // Returns the size in number of SGPRs for preload user SGPR field.
1876 switch (ID) {
1878 return 2;
1880 return 4;
1881 case DispatchPtrID:
1882 return 2;
1883 case QueuePtrID:
1884 return 2;
1886 return 2;
1887 case DispatchIdID:
1888 return 2;
1889 case FlatScratchInitID:
1890 return 2;
1892 return 1;
1893 }
1894 llvm_unreachable("Unknown UserSGPRID.");
1895 }
1896
1897 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1898
1899private:
1900 const GCNSubtarget &ST;
1901
1902 // Private memory buffer
1903 // Compute directly in sgpr[0:1]
1904 // Other shaders indirect 64-bits at sgpr[0:1]
1905 bool ImplicitBufferPtr = false;
1906
1907 bool PrivateSegmentBuffer = false;
1908
1909 bool DispatchPtr = false;
1910
1911 bool QueuePtr = false;
1912
1913 bool KernargSegmentPtr = false;
1914
1915 bool DispatchID = false;
1916
1917 bool FlatScratchInit = false;
1918
1919 bool PrivateSegmentSize = false;
1920
1921 unsigned NumKernargPreloadSGPRs = 0;
1922
1923 unsigned NumUsedUserSGPRs = 0;
1924};
1925
1926} // end namespace llvm
1927
1928#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
Definition: GCNSubtarget.h:948
bool hasFlat() const
Definition: GCNSubtarget.h:428
bool hasD16Images() const
Definition: GCNSubtarget.h:751
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:66
bool useVGPRIndexMode() const
bool HasIEEEMinimumMaximumInsts
Definition: GCNSubtarget.h:272
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:910
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:800
bool hasFlatGVSMode() const
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:256
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:533
bool hasSwap() const
Definition: GCNSubtarget.h:488
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:890
bool HasAtomicFMinFMaxF64FlatInsts
Definition: GCNSubtarget.h:175
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
Definition: GCNSubtarget.h:830
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:743
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:804
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:614
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
Definition: GCNSubtarget.h:379
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:434
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:522
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:513
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
Definition: GCNSubtarget.h:906
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:727
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:739
bool flatScratchIsPointer() const
bool hasSDWAMac() const
Definition: GCNSubtarget.h:812
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:886
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:850
bool hasApertureRegs() const
Definition: GCNSubtarget.h:648
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:68
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:517
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:678
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
Definition: GCNSubtarget.h:902
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:965
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:979
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
Definition: GCNSubtarget.h:646
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
Definition: GCNSubtarget.h:820
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:454
bool HasTransposeLoadF4F6Insts
Definition: GCNSubtarget.h:242
bool hasMAIInsts() const
Definition: GCNSubtarget.h:878
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
Definition: GCNSubtarget.h:955
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:686
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:731
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:412
bool hasScaleOffset() const
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:572
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:983
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:630
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:308
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:77
bool hasDot1Insts() const
Definition: GCNSubtarget.h:826
bool hasDot3Insts() const
Definition: GCNSubtarget.h:834
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:330
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:608
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:918
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
bool HasLdsBarrierArriveAtomic
Definition: GCNSubtarget.h:281
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:350
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:213
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:598
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:500
bool hasDot11Insts() const
Definition: GCNSubtarget.h:866
bool enableFlatScratch() const
Definition: GCNSubtarget.h:703
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:618
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool HasPrivEnabledTrap2NopBug
Definition: GCNSubtarget.h:264
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:708
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:674
bool hasFmaMixBF16Insts() const
Definition: GCNSubtarget.h:478
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:506
bool supportsWave32() const
bool hasCUStores() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:660
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:938
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:882
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:320
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasVscnt() const
Definition: GCNSubtarget.h:975
bool hasMad64_32() const
Definition: GCNSubtarget.h:796
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:334
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:122
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:582
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:747
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:824
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:792
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
Definition: GCNSubtarget.h:720
bool hasDPP() const
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:724
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:342
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:502
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:664
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:312
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:634
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:816
bool hasAtomicFMinFMaxF32GlobalInsts() const
Definition: GCNSubtarget.h:894
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:346
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:699
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:558
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:999
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:626
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:470
bool hasIntClamp() const
Definition: GCNSubtarget.h:400
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
Definition: GCNSubtarget.h:888
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:735
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
Definition: GCNSubtarget.h:961
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:554
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:316
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:846
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:420
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:652
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:177
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:682
bool HasGloballyAddressableScratch
Definition: GCNSubtarget.h:286
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
Definition: GCNSubtarget.h:172
bool getScalarizeGlobalBehavior() const
bool HasAtomicFMinFMaxF32FlatInsts
Definition: GCNSubtarget.h:174
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:785
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:375
bool hasScratchBaseForwardingHazard() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:991
bool hasSDWASdst() const
Definition: GCNSubtarget.h:808
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:191
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:492
bool hasFFBL() const
Definition: GCNSubtarget.h:458
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:670
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:604
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
Definition: GCNSubtarget.h:898
bool hasMed3_16() const
Definition: GCNSubtarget.h:466
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
Definition: GCNSubtarget.h:638
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:912
bool hasBFI() const
Definition: GCNSubtarget.h:446
bool hasDot13Insts() const
Definition: GCNSubtarget.h:874
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:757
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:249
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:182
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:622
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:383
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool HasRequiredExportPriority
Definition: GCNSubtarget.h:269
bool hasFMA() const
Definition: GCNSubtarget.h:484
bool hasDot10Insts() const
Definition: GCNSubtarget.h:862
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:567
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
Definition: GCNSubtarget.h:185
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:462
bool hasFormattedMUBUFInsts() const
Definition: GCNSubtarget.h:718
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:697
bool supportsWGP() const
Definition: GCNSubtarget.h:394
bool hasG16() const
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:416
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:914
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:181
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:222
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:922
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:408
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
Definition: GCNSubtarget.h:934
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
Definition: GCNSubtarget.h:189
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
Definition: GCNSubtarget.h:842
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:920
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:196
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
Definition: GCNSubtarget.h:942
bool HasEmulatedSystemScopeAtomics
Definition: GCNSubtarget.h:190
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:450
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:592
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:854
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:496
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:338
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
Definition: GCNSubtarget.h:870
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:587
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
Definition: GCNSubtarget.h:173
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:576
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
Definition: GCNSubtarget.h:356
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:995
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:926
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:971
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:656
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:783
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:787
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:539
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
Definition: GCNSubtarget.h:549
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:545
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:693
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:771
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:930
bool hasAddr64() const
Definition: GCNSubtarget.h:424
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:248
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:184
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:642
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:528
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:474
bool hasCARRY() const
Definition: GCNSubtarget.h:480
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
Definition: GCNSubtarget.h:404
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:779
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:987
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:97
bool hasFractBug() const
Definition: GCNSubtarget.h:438
bool isPreciseMemoryEnabled() const
Definition: GCNSubtarget.h:668
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:360
bool HasMemoryAtomicFaddF32DenormalSupport
Definition: GCNSubtarget.h:180
bool hasMTBUFInsts() const
Definition: GCNSubtarget.h:716
bool hasDot4Insts() const
Definition: GCNSubtarget.h:838
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:858
bool hasVOPD3() const
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:712
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:64
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:967
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:326
bool hasBFE() const
Definition: GCNSubtarget.h:442
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:766
bool HasFlatBufferGlobalAtomicFaddF64Inst
Definition: GCNSubtarget.h:187
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
bool hasPrivateSegmentSize() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:51
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:235
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.