LLVM 22.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// AMD GCN specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16
17#include "AMDGPUCallLowering.h"
19#include "AMDGPUSubtarget.h"
20#include "SIFrameLowering.h"
21#include "SIISelLowering.h"
22#include "SIInstrInfo.h"
25
26#define GET_SUBTARGETINFO_HEADER
27#include "AMDGPUGenSubtargetInfo.inc"
28
29namespace llvm {
30
31class GCNTargetMachine;
32
34 public AMDGPUSubtarget {
35public:
37
38 // Following 2 enums are documented at:
39 // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40 enum class TrapHandlerAbi {
41 NONE = 0x00,
42 AMDHSA = 0x01,
43 };
44
45 enum class TrapID {
46 LLVMAMDHSATrap = 0x02,
48 };
49
50private:
51 /// SelectionDAGISel related APIs.
52 std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;
53
54 /// GlobalISel related APIs.
55 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
56 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
57 std::unique_ptr<InstructionSelector> InstSelector;
58 std::unique_ptr<LegalizerInfo> Legalizer;
59 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
60
61protected:
62 // Basic subtarget description.
65 unsigned Gen = INVALID;
67 int LDSBankCount = 0;
69
70 // Possibly statically set by tablegen, but may want to be overridden.
71 bool FastDenormalF32 = false;
72 bool HalfRate64Ops = false;
73 bool FullRate64Ops = false;
74
75 // Dynamically set bits that enable features.
76 bool FlatForGlobal = false;
78 bool BackOffBarrier = false;
80 bool UnalignedAccessMode = false;
82 bool HasApertureRegs = false;
83 bool SupportsXNACK = false;
84 bool KernargPreload = false;
85
86 // This should not be used directly. 'TargetID' tracks the dynamic settings
87 // for XNACK.
88 bool EnableXNACK = false;
89
90 bool EnableTgSplit = false;
91 bool EnableCuMode = false;
92 bool TrapHandler = false;
93 bool EnablePreciseMemory = false;
94
95 // Used as options.
96 bool EnableLoadStoreOpt = false;
98 bool EnableSIScheduler = false;
99 bool EnableDS128 = false;
101 bool DumpCode = false;
102
103 // Subtarget statically properties set by tablegen
104 bool FP64 = false;
105 bool FMA = false;
106 bool MIMG_R128 = false;
107 bool CIInsts = false;
108 bool GFX8Insts = false;
109 bool GFX9Insts = false;
110 bool GFX90AInsts = false;
111 bool GFX940Insts = false;
112 bool GFX950Insts = false;
113 bool GFX10Insts = false;
114 bool GFX11Insts = false;
115 bool GFX12Insts = false;
116 bool GFX1250Insts = false;
117 bool GFX10_3Insts = false;
118 bool GFX7GFX8GFX9Insts = false;
119 bool SGPRInitBug = false;
120 bool UserSGPRInit16Bug = false;
123 bool HasSMemRealTime = false;
124 bool HasIntClamp = false;
125 bool HasFmaMixInsts = false;
126 bool HasFmaMixBF16Insts = false;
127 bool HasMovrel = false;
128 bool HasVGPRIndexMode = false;
130 bool HasScalarStores = false;
131 bool HasScalarAtomics = false;
132 bool HasSDWAOmod = false;
133 bool HasSDWAScalar = false;
134 bool HasSDWASdst = false;
135 bool HasSDWAMac = false;
136 bool HasSDWAOutModsVOPC = false;
137 bool HasDPP = false;
138 bool HasDPP8 = false;
139 bool HasDPALU_DPP = false;
140 bool HasDPPSrc1SGPR = false;
141 bool HasPackedFP32Ops = false;
142 bool HasImageInsts = false;
144 bool HasR128A16 = false;
145 bool HasA16 = false;
146 bool HasG16 = false;
147 bool HasNSAEncoding = false;
149 bool GFX10_AEncoding = false;
150 bool GFX10_BEncoding = false;
151 bool HasDLInsts = false;
152 bool HasFmacF64Inst = false;
153 bool HasDot1Insts = false;
154 bool HasDot2Insts = false;
155 bool HasDot3Insts = false;
156 bool HasDot4Insts = false;
157 bool HasDot5Insts = false;
158 bool HasDot6Insts = false;
159 bool HasDot7Insts = false;
160 bool HasDot8Insts = false;
161 bool HasDot9Insts = false;
162 bool HasDot10Insts = false;
163 bool HasDot11Insts = false;
164 bool HasDot12Insts = false;
165 bool HasDot13Insts = false;
166 bool HasMAIInsts = false;
167 bool HasFP8Insts = false;
169 bool HasFP8E5M3Insts = false;
170 bool HasCvtFP8Vop1Bug = false;
171 bool HasPkFmacF16Inst = false;
192 bool HasXF32Insts = false;
193 /// The maximum number of instructions that may be placed within an S_CLAUSE,
194 /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
195 /// indicates a lack of S_CLAUSE support.
197 bool SupportsSRAMECC = false;
198 bool DynamicVGPR = false;
200 bool HasVMemToLDSLoad = false;
201 bool RequiresAlignVGPR = false;
202
203 // This should not be used directly. 'TargetID' tracks the dynamic settings
204 // for SRAMECC.
205 bool EnableSRAMECC = false;
206
207 bool HasNoSdstCMPX = false;
208 bool HasVscnt = false;
209 bool HasWaitXcnt = false;
210 bool HasGetWaveIdInst = false;
211 bool HasSMemTimeInst = false;
214 bool HasVOP3Literal = false;
215 bool HasNoDataDepHazard = false;
216 bool FlatAddressSpace = false;
217 bool FlatInstOffsets = false;
218 bool FlatGlobalInsts = false;
219 bool FlatScratchInsts = false;
220 bool FlatGVSMode = false;
223 bool EnableFlatScratch = false;
225 bool HasGDS = false;
226 bool HasGWS = false;
227 bool AddNoCarryInsts = false;
228 bool HasUnpackedD16VMem = false;
229 bool LDSMisalignedBug = false;
232 bool UnalignedDSAccess = false;
233 bool HasPackedTID = false;
234 bool ScalarizeGlobal = false;
235 bool HasSALUFloatInsts = false;
238 bool Has64BitLiterals = false;
240 bool HasBitOp3Insts = false;
241 bool HasTanhInsts = false;
244 bool HasPrngInst = false;
246 bool HasPermlane16Swap = false;
247 bool HasPermlane32Swap = false;
252 bool HasVmemPrefInsts = false;
254 bool HasSafeCUPrefetch = false;
255 bool HasCUStores = false;
258 bool HasNSAtoVMEMBug = false;
259 bool HasNSAClauseBug = false;
260 bool HasOffset3fBug = false;
266 bool Has1_5xVGPRs = false;
267 bool HasMADIntraFwdBug = false;
268 bool HasVOPDInsts = false;
272 bool HasAshrPkInsts = false;
276 bool HasMin3Max3PKF16 = false;
278 bool HasLshlAddU64Inst = false;
279 bool HasAddSubU64Insts = false;
280 bool HasMadU32Inst = false;
284
285 bool RequiresCOV6 = false;
288
289 // Dummy feature to use for assembler in tablegen.
290 bool FeatureDisable = false;
291
292private:
293 SIInstrInfo InstrInfo;
294 SITargetLowering TLInfo;
295 SIFrameLowering FrameLowering;
296
297public:
298 GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
299 const GCNTargetMachine &TM);
300 ~GCNSubtarget() override;
301
303 StringRef GPU, StringRef FS);
304
305 /// Diagnose inconsistent subtarget features before attempting to codegen
306 /// function \p F.
307 void checkSubtargetFeatures(const Function &F) const;
308
309 const SIInstrInfo *getInstrInfo() const override {
310 return &InstrInfo;
311 }
312
313 const SIFrameLowering *getFrameLowering() const override {
314 return &FrameLowering;
315 }
316
317 const SITargetLowering *getTargetLowering() const override {
318 return &TLInfo;
319 }
320
321 const SIRegisterInfo *getRegisterInfo() const override {
322 return &InstrInfo.getRegisterInfo();
323 }
324
325 const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
326
327 const CallLowering *getCallLowering() const override {
328 return CallLoweringInfo.get();
329 }
330
331 const InlineAsmLowering *getInlineAsmLowering() const override {
332 return InlineAsmLoweringInfo.get();
333 }
334
336 return InstSelector.get();
337 }
338
339 const LegalizerInfo *getLegalizerInfo() const override {
340 return Legalizer.get();
341 }
342
343 const AMDGPURegisterBankInfo *getRegBankInfo() const override {
344 return RegBankInfo.get();
345 }
346
348 return TargetID;
349 }
350
352 return &InstrItins;
353 }
354
356
358 return (Generation)Gen;
359 }
360
361 unsigned getMaxWaveScratchSize() const {
362 // See COMPUTE_TMPRING_SIZE.WAVESIZE.
363 if (getGeneration() >= GFX12) {
364 // 18-bit field in units of 64-dword.
365 return (64 * 4) * ((1 << 18) - 1);
366 }
367 if (getGeneration() == GFX11) {
368 // 15-bit field in units of 64-dword.
369 return (64 * 4) * ((1 << 15) - 1);
370 }
371 // 13-bit field in units of 256-dword.
372 return (256 * 4) * ((1 << 13) - 1);
373 }
374
375 /// Return the number of high bits known to be zero for a frame index.
378 }
379
380 int getLDSBankCount() const {
381 return LDSBankCount;
382 }
383
384 unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
385 return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
386 }
387
388 unsigned getConstantBusLimit(unsigned Opcode) const;
389
390 /// Returns if the result of this instruction with a 16-bit result returned in
391 /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
392 /// the original value.
393 bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
394
395 bool supportsWGP() const {
396 if (GFX1250Insts)
397 return false;
398 return getGeneration() >= GFX10;
399 }
400
401 bool hasIntClamp() const {
402 return HasIntClamp;
403 }
404
405 bool hasFP64() const {
406 return FP64;
407 }
408
409 bool hasMIMG_R128() const {
410 return MIMG_R128;
411 }
412
413 bool hasHWFP64() const {
414 return FP64;
415 }
416
417 bool hasHalfRate64Ops() const {
418 return HalfRate64Ops;
419 }
420
421 bool hasFullRate64Ops() const {
422 return FullRate64Ops;
423 }
424
425 bool hasAddr64() const {
427 }
428
429 bool hasFlat() const {
431 }
432
433 // Return true if the target only has the reverse operand versions of VALU
434 // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
435 bool hasOnlyRevVALUShifts() const {
437 }
438
439 bool hasFractBug() const {
441 }
442
443 bool hasBFE() const {
444 return true;
445 }
446
447 bool hasBFI() const {
448 return true;
449 }
450
451 bool hasBFM() const {
452 return hasBFE();
453 }
454
455 bool hasBCNT(unsigned Size) const {
456 return true;
457 }
458
459 bool hasFFBL() const {
460 return true;
461 }
462
463 bool hasFFBH() const {
464 return true;
465 }
466
467 bool hasMed3_16() const {
469 }
470
471 bool hasMin3Max3_16() const {
473 }
474
475 bool hasFmaMixInsts() const {
476 return HasFmaMixInsts;
477 }
478
479 bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }
480
481 bool hasCARRY() const {
482 return true;
483 }
484
485 bool hasFMA() const {
486 return FMA;
487 }
488
489 bool hasSwap() const {
490 return GFX9Insts;
491 }
492
493 bool hasScalarPackInsts() const {
494 return GFX9Insts;
495 }
496
497 bool hasScalarMulHiInsts() const {
498 return GFX9Insts;
499 }
500
501 bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }
502
505 }
506
508 // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
509 return getGeneration() >= GFX9;
510 }
511
512 /// True if the offset field of DS instructions works as expected. On SI, the
513 /// offset uses a 16-bit adder and does not always wrap properly.
514 bool hasUsableDSOffset() const {
515 return getGeneration() >= SEA_ISLANDS;
516 }
517
520 }
521
522 /// Condition output from div_scale is usable.
525 }
526
527 /// Extra wait hazard is needed in some cases before
528 /// s_cbranch_vccnz/s_cbranch_vccz.
529 bool hasReadVCCZBug() const {
530 return getGeneration() <= SEA_ISLANDS;
531 }
532
533 /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
535 return getGeneration() >= GFX10;
536 }
537
538 /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
539 /// was written by a VALU instruction.
542 }
543
544 /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
545 /// SGPR was written by a VALU Instruction.
548 }
549
550 bool hasRFEHazards() const {
552 }
553
554 /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
555 unsigned getSetRegWaitStates() const {
556 return getGeneration() <= SEA_ISLANDS ? 1 : 2;
557 }
558
559 bool dumpCode() const {
560 return DumpCode;
561 }
562
563 /// Return the amount of LDS that can be used that will not restrict the
564 /// occupancy lower than WaveCount.
565 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
566 const Function &) const;
567
570 }
571
572 /// \returns If target supports S_DENORM_MODE.
573 bool hasDenormModeInst() const {
575 }
576
577 bool useFlatForGlobal() const {
578 return FlatForGlobal;
579 }
580
581 /// \returns If target supports ds_read/write_b128 and user enables generation
582 /// of ds_read/write_b128.
583 bool useDS128() const {
584 return CIInsts && EnableDS128;
585 }
586
587 /// \return If target supports ds_read/write_b96/128.
588 bool hasDS96AndDS128() const {
589 return CIInsts;
590 }
591
592 /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
593 bool haveRoundOpsF64() const {
594 return CIInsts;
595 }
596
597 /// \returns If MUBUF instructions always perform range checking, even for
598 /// buffer resources used for private memory access.
601 }
602
603 /// \returns If target requires PRT Struct NULL support (zero result registers
604 /// for sparse texture support).
605 bool usePRTStrictNull() const {
606 return EnablePRTStrictNull;
607 }
608
611 }
612
613 /// \returns true if the target supports backing off of s_barrier instructions
614 /// when an exception is raised.
616 return BackOffBarrier;
617 }
618
621 }
622
625 }
626
627 bool hasUnalignedDSAccess() const {
628 return UnalignedDSAccess;
629 }
630
633 }
634
637 }
638
641 }
642
644 return UnalignedAccessMode;
645 }
646
648
649 bool hasApertureRegs() const {
650 return HasApertureRegs;
651 }
652
653 bool isTrapHandlerEnabled() const {
654 return TrapHandler;
655 }
656
657 bool isXNACKEnabled() const {
658 return TargetID.isXnackOnOrAny();
659 }
660
661 bool isTgSplitEnabled() const {
662 return EnableTgSplit;
663 }
664
665 bool isCuModeEnabled() const {
666 return EnableCuMode;
667 }
668
670
671 bool hasFlatAddressSpace() const {
672 return FlatAddressSpace;
673 }
674
675 bool hasFlatScrRegister() const {
676 return hasFlatAddressSpace();
677 }
678
679 bool hasFlatInstOffsets() const {
680 return FlatInstOffsets;
681 }
682
683 bool hasFlatGlobalInsts() const {
684 return FlatGlobalInsts;
685 }
686
687 bool hasFlatScratchInsts() const {
688 return FlatScratchInsts;
689 }
690
691 // Check if target supports ST addressing mode with FLAT scratch instructions.
692 // The ST addressing mode means no registers are used, either VGPR or SGPR,
693 // but only immediate offset is swizzled and added to the FLAT scratch base.
694 bool hasFlatScratchSTMode() const {
696 }
697
698 bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
699
702 }
703
704 bool enableFlatScratch() const {
705 return flatScratchIsArchitected() ||
707 }
708
709 bool hasGlobalAddTidInsts() const {
710 return GFX10_BEncoding;
711 }
712
713 bool hasAtomicCSub() const {
714 return GFX10_BEncoding;
715 }
716
717 bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }
718
719 bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }
720
721 bool hasExportInsts() const {
722 return !hasGFX940Insts() && !hasGFX1250Insts();
723 }
724
725 bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
726
727 // DS_ADD_F64/DS_ADD_RTN_F64
728 bool hasLdsAtomicAddF64() const {
729 return hasGFX90AInsts() || hasGFX1250Insts();
730 }
731
733 return getGeneration() >= GFX9;
734 }
735
738 }
739
741 return getGeneration() > GFX9;
742 }
743
744 bool hasD16LoadStore() const {
745 return getGeneration() >= GFX9;
746 }
747
750 }
751
752 bool hasD16Images() const {
754 }
755
756 /// Return if most LDS instructions have an m0 use that require m0 to be
757 /// initialized.
758 bool ldsRequiresM0Init() const {
759 return getGeneration() < GFX9;
760 }
761
762 // True if the hardware rewinds and replays GWS operations if a wave is
763 // preempted.
764 //
765 // If this is false, a GWS operation requires testing if a nack set the
766 // MEM_VIOL bit, and repeating if so.
767 bool hasGWSAutoReplay() const {
768 return getGeneration() >= GFX9;
769 }
770
771 /// \returns if target has ds_gws_sema_release_all instruction.
772 bool hasGWSSemaReleaseAll() const {
773 return CIInsts;
774 }
775
776 /// \returns true if the target has integer add/sub instructions that do not
777 /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
778 /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
779 /// for saturation.
780 bool hasAddNoCarry() const {
781 return AddNoCarryInsts;
782 }
783
784 bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
785
786 bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
787
788 bool hasUnpackedD16VMem() const {
789 return HasUnpackedD16VMem;
790 }
791
792 // Covers VS/PS/CS graphics shaders
793 bool isMesaGfxShader(const Function &F) const {
794 return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
795 }
796
797 bool hasMad64_32() const {
798 return getGeneration() >= SEA_ISLANDS;
799 }
800
801 bool hasSDWAOmod() const {
802 return HasSDWAOmod;
803 }
804
805 bool hasSDWAScalar() const {
806 return HasSDWAScalar;
807 }
808
809 bool hasSDWASdst() const {
810 return HasSDWASdst;
811 }
812
813 bool hasSDWAMac() const {
814 return HasSDWAMac;
815 }
816
817 bool hasSDWAOutModsVOPC() const {
818 return HasSDWAOutModsVOPC;
819 }
820
821 bool hasDLInsts() const {
822 return HasDLInsts;
823 }
824
825 bool hasFmacF64Inst() const { return HasFmacF64Inst; }
826
827 bool hasDot1Insts() const {
828 return HasDot1Insts;
829 }
830
831 bool hasDot2Insts() const {
832 return HasDot2Insts;
833 }
834
835 bool hasDot3Insts() const {
836 return HasDot3Insts;
837 }
838
839 bool hasDot4Insts() const {
840 return HasDot4Insts;
841 }
842
843 bool hasDot5Insts() const {
844 return HasDot5Insts;
845 }
846
847 bool hasDot6Insts() const {
848 return HasDot6Insts;
849 }
850
851 bool hasDot7Insts() const {
852 return HasDot7Insts;
853 }
854
855 bool hasDot8Insts() const {
856 return HasDot8Insts;
857 }
858
859 bool hasDot9Insts() const {
860 return HasDot9Insts;
861 }
862
863 bool hasDot10Insts() const {
864 return HasDot10Insts;
865 }
866
867 bool hasDot11Insts() const {
868 return HasDot11Insts;
869 }
870
871 bool hasDot12Insts() const {
872 return HasDot12Insts;
873 }
874
875 bool hasDot13Insts() const {
876 return HasDot13Insts;
877 }
878
879 bool hasMAIInsts() const {
880 return HasMAIInsts;
881 }
882
883 bool hasFP8Insts() const {
884 return HasFP8Insts;
885 }
886
888
889 bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
890
891 bool hasPkFmacF16Inst() const {
892 return HasPkFmacF16Inst;
893 }
894
897 }
898
901 }
902
905 }
906
909 }
910
912
914
915 bool hasAtomicFaddInsts() const {
917 }
918
920
922
925 }
926
929 }
930
933 }
934
937 }
938
940
941 /// \return true if the target has flat, global, and buffer atomic fadd for
942 /// double.
945 }
946
947 /// \return true if the target's flat, global, and buffer atomic fadd for
948 /// float supports denormal handling.
951 }
952
953 /// \return true if atomic operations targeting fine-grained memory work
954 /// correctly at device scope, in allocations in host or peer PCIe device
955 /// memory.
958 }
959
960 /// \return true is HW emulates system scope atomics unsupported by the PCI-e
961 /// via CAS loop.
964 }
965
967
970 }
971
972 bool hasNoSdstCMPX() const {
973 return HasNoSdstCMPX;
974 }
975
976 bool hasVscnt() const {
977 return HasVscnt;
978 }
979
980 bool hasGetWaveIdInst() const {
981 return HasGetWaveIdInst;
982 }
983
984 bool hasSMemTimeInst() const {
985 return HasSMemTimeInst;
986 }
987
990 }
991
994 }
995
996 bool hasVOP3Literal() const {
997 return HasVOP3Literal;
998 }
999
1000 bool hasNoDataDepHazard() const {
1001 return HasNoDataDepHazard;
1002 }
1003
1005 return getGeneration() < SEA_ISLANDS;
1006 }
1007
1008 bool hasInstPrefetch() const {
1009 return getGeneration() == GFX10 || getGeneration() == GFX11;
1010 }
1011
1012 bool hasPrefetch() const { return GFX12Insts; }
1013
1014 bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
1015
1017
1018 bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }
1019
1020 bool hasCUStores() const { return HasCUStores; }
1021
1022 // Has s_cmpk_* instructions.
1023 bool hasSCmpK() const { return getGeneration() < GFX12; }
1024
1025 // Scratch is allocated in 256 dword per wave blocks for the entire
1026 // wavefront. When viewed from the perspective of an arbitrary workitem, this
1027 // is 4-byte aligned.
1028 //
1029 // Only 4-byte alignment is really needed to access anything. Transformations
1030 // on the pointer value itself may rely on the alignment / known low bits of
1031 // the pointer. Set this to something above the minimum to avoid needing
1032 // dynamic realignment in common cases.
1033 Align getStackAlignment() const { return Align(16); }
1034
1035 bool enableMachineScheduler() const override {
1036 return true;
1037 }
1038
1039 bool useAA() const override;
1040
1041 bool enableSubRegLiveness() const override {
1042 return true;
1043 }
1044
1047
1048 // static wrappers
1049 static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
1050
1051 // XXX - Why is this here if it isn't in the default pass set?
1052 bool enableEarlyIfConversion() const override {
1053 return true;
1054 }
1055
1057 const SchedRegion &Region) const override;
1058
1060 const SchedRegion &Region) const override;
1061
1062 void mirFileLoaded(MachineFunction &MF) const override;
1063
1064 unsigned getMaxNumUserSGPRs() const {
1065 return AMDGPU::getMaxNumUserSGPRs(*this);
1066 }
1067
1068 bool hasSMemRealTime() const {
1069 return HasSMemRealTime;
1070 }
1071
1072 bool hasMovrel() const {
1073 return HasMovrel;
1074 }
1075
1076 bool hasVGPRIndexMode() const {
1077 return HasVGPRIndexMode;
1078 }
1079
1080 bool useVGPRIndexMode() const;
1081
1083 return getGeneration() >= VOLCANIC_ISLANDS;
1084 }
1085
1087
1088 bool hasScalarStores() const {
1089 return HasScalarStores;
1090 }
1091
1092 bool hasScalarAtomics() const {
1093 return HasScalarAtomics;
1094 }
1095
1096 bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
1098
1099 /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
1100 bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
1101
1102 /// \returns true if the subtarget has the v_permlane64_b32 instruction.
1103 bool hasPermLane64() const { return getGeneration() >= GFX11; }
1104
1105 bool hasDPP() const {
1106 return HasDPP;
1107 }
1108
1109 bool hasDPPBroadcasts() const {
1110 return HasDPP && getGeneration() < GFX10;
1111 }
1112
1114 return HasDPP && getGeneration() < GFX10;
1115 }
1116
1117 bool hasDPP8() const {
1118 return HasDPP8;
1119 }
1120
1121 bool hasDPALU_DPP() const {
1122 return HasDPALU_DPP;
1123 }
1124
1125 bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }
1126
1127 bool hasPackedFP32Ops() const {
1128 return HasPackedFP32Ops;
1129 }
1130
1131 // Has V_PK_MOV_B32 opcode
1132 bool hasPkMovB32() const {
1133 return GFX90AInsts;
1134 }
1135
1137 return getGeneration() >= GFX10 || hasGFX940Insts();
1138 }
1139
1140 bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
1141
1142 bool hasImageInsts() const {
1143 return HasImageInsts;
1144 }
1145
1147 return HasExtendedImageInsts;
1148 }
1149
1150 bool hasR128A16() const {
1151 return HasR128A16;
1152 }
1153
1154 bool hasA16() const { return HasA16; }
1155
1156 bool hasG16() const { return HasG16; }
1157
1158 bool hasOffset3fBug() const {
1159 return HasOffset3fBug;
1160 }
1161
1163
1165
1166 bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
1167
1169
1171
1172 bool hasNSAEncoding() const { return HasNSAEncoding; }
1173
1174 bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }
1175
1177
1178 unsigned getNSAMaxSize(bool HasSampler = false) const {
1179 return AMDGPU::getNSAMaxSize(*this, HasSampler);
1180 }
1181
1182 bool hasGFX10_AEncoding() const {
1183 return GFX10_AEncoding;
1184 }
1185
1186 bool hasGFX10_BEncoding() const {
1187 return GFX10_BEncoding;
1188 }
1189
1190 bool hasGFX10_3Insts() const {
1191 return GFX10_3Insts;
1192 }
1193
1194 bool hasMadF16() const;
1195
1196 bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }
1197
1198 bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }
1199
1200 // Scalar and global loads support scale_offset bit.
1201 bool hasScaleOffset() const { return GFX1250Insts; }
1202
1203 bool hasFlatGVSMode() const { return FlatGVSMode; }
1204
1205 // FLAT GLOBAL VOffset is signed
1206 bool hasSignedGVSOffset() const { return GFX1250Insts; }
1207
1208 bool enableSIScheduler() const {
1209 return EnableSIScheduler;
1210 }
1211
1212 bool loadStoreOptEnabled() const {
1213 return EnableLoadStoreOpt;
1214 }
1215
1216 bool hasSGPRInitBug() const {
1217 return SGPRInitBug;
1218 }
1219
1221 return UserSGPRInit16Bug && isWave32();
1222 }
1223
1225
1228 }
1229
1232 }
1233
1236 }
1237
1238 // \returns true if the subtarget supports DWORDX3 load/store instructions.
1240 return CIInsts;
1241 }
1242
1245 }
1246
1250 }
1251
1254 }
1255
1258 }
1259
1262 }
1263
1266 }
1267
1270 }
1271
1272 bool hasLDSMisalignedBug() const {
1273 return LDSMisalignedBug && !EnableCuMode;
1274 }
1275
1277 return HasInstFwdPrefetchBug;
1278 }
1279
1281 return HasVcmpxExecWARHazard;
1282 }
1283
1286 }
1287
1288 // Shift amount of a 64 bit shift cannot be a highest allocated register
1289 // if also at the end of the allocation block.
1291 return GFX90AInsts && !GFX940Insts;
1292 }
1293
1294 // Has one cycle hazard on transcendental instruction feeding a
1295 // non transcendental VALU.
1296 bool hasTransForwardingHazard() const { return GFX940Insts; }
1297
1298 // Has one cycle hazard on a VALU instruction partially writing dst with
1299 // a shift of result bits feeding another VALU instruction.
1301
1302 // Cannot use op_sel with v_dot instructions.
1303 bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
1304
1305 // Does not have HW interlocs for VALU writing and then reading SGPRs.
1306 bool hasVDecCoExecHazard() const {
1307 return GFX940Insts;
1308 }
1309
1310 bool hasNSAtoVMEMBug() const {
1311 return HasNSAtoVMEMBug;
1312 }
1313
1314 bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1315
1316 bool hasHardClauses() const { return MaxHardClauseLength > 0; }
1317
1318 bool hasGFX90AInsts() const { return GFX90AInsts; }
1319
1321 return getGeneration() == GFX10;
1322 }
1323
1324 bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1325
1326 bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1327
1328 bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
1329
1331 return getGeneration() == GFX11;
1332 }
1333
1335
1337
1338 bool requiresCodeObjectV6() const { return RequiresCOV6; }
1339
1341
1344 }
1345
1346 bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
1347
1348 bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
1349
1351 return GFX1250Insts && getGeneration() == GFX12;
1352 }
1353
1354 /// Return if operations acting on VGPR tuples require even alignment.
1355 bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }
1356
1357 /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1358 bool hasSPackHL() const { return GFX11Insts; }
1359
1360 /// Return true if the target's EXP instruction has the COMPR flag, which
1361 /// affects the meaning of the EN (enable) bits.
1362 bool hasCompressedExport() const { return !GFX11Insts; }
1363
1364 /// Return true if the target's EXP instruction supports the NULL export
1365 /// target.
1366 bool hasNullExportTarget() const { return !GFX11Insts; }
1367
1368 bool has1_5xVGPRs() const { return Has1_5xVGPRs; }
1369
1370 bool hasVOPDInsts() const { return HasVOPDInsts; }
1371
1373
1374 /// Return true if the target has the S_DELAY_ALU instruction.
1375 bool hasDelayAlu() const { return GFX11Insts; }
1376
1377 bool hasPackedTID() const { return HasPackedTID; }
1378
1379 // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
1380 // hasGFX90AInsts is also true.
1381 bool hasGFX940Insts() const { return GFX940Insts; }
1382
1383 // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
1384 // hasGFX940Insts and hasGFX90AInsts are also true.
1385 bool hasGFX950Insts() const { return GFX950Insts; }
1386
1387 /// Returns true if the target supports
1388 /// global_load_lds_dwordx3/global_load_lds_dwordx4 or
1389 /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
1390 bool hasLDSLoadB96_B128() const {
1391 return hasGFX950Insts();
1392 }
1393
1394 bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }
1395
1396 bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
1397
1399
1401
1403
1405
1406 /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1407 /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1408 bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1409
1410 /// \returns true if inline constants are not supported for F16 pseudo
1411 /// scalar transcendentals.
1413 return getGeneration() == GFX12;
1414 }
1415
1416 /// \returns true if the target has instructions with xf32 format support.
1417 bool hasXF32Insts() const { return HasXF32Insts; }
1418
1419 bool hasBitOp3Insts() const { return HasBitOp3Insts; }
1420
1421 bool hasPermlane16Swap() const { return HasPermlane16Swap; }
1422 bool hasPermlane32Swap() const { return HasPermlane32Swap; }
1423 bool hasAshrPkInsts() const { return HasAshrPkInsts; }
1424
1427 }
1428
1431 }
1432
1433 bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }
1434
1435 bool hasTanhInsts() const { return HasTanhInsts; }
1436
1438
1439 bool hasAddPC64Inst() const { return GFX1250Insts; }
1440
1442
1445 }
1446
1448
1449 /// \returns true if the target has s_wait_xcnt insertion. Supported for
1450 /// GFX1250.
1451 bool hasWaitXCnt() const { return HasWaitXcnt; }
1452
1453 // A single DWORD instructions can use a 64-bit literal.
1454 bool has64BitLiterals() const { return Has64BitLiterals; }
1455
1457
1459
1460 /// \returns The maximum number of instructions that can be enclosed in an
1461 /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
1462 /// instruction.
1463 unsigned maxHardClauseLength() const { return MaxHardClauseLength; }
1464
1465 bool hasPrngInst() const { return HasPrngInst; }
1466
1468
1469 /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1470 /// SGPRs
1471 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1472
1473 /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1474 /// VGPRs
1475 unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,
1476 unsigned DynamicVGPRBlockSize) const;
1477
1478 /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can
1479 /// be achieved when the only function running on a CU is \p F, each workgroup
1480 /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p
1481 /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a
1482 /// range, so this returns a range as well.
1483 ///
1484 /// Note that occupancy can be affected by the scratch allocation as well, but
1485 /// we do not have enough information to compute it.
1486 std::pair<unsigned, unsigned> computeOccupancy(const Function &F,
1487 unsigned LDSSize = 0,
1488 unsigned NumSGPRs = 0,
1489 unsigned NumVGPRs = 0) const;
1490
1491 /// \returns true if the flat_scratch register should be initialized with the
1492 /// pointer to the wave's scratch memory rather than a size and offset.
1495 }
1496
1497 /// \returns true if the flat_scratch register is initialized by the HW.
1498 /// In this case it is readonly.
1500
1501 /// \returns true if the architected SGPRs are enabled.
1503
1504 /// \returns true if Global Data Share is supported.
1505 bool hasGDS() const { return HasGDS; }
1506
1507 /// \returns true if Global Wave Sync is supported.
1508 bool hasGWS() const { return HasGWS; }
1509
1510 /// \returns true if the machine has merged shaders in which s0-s7 are
1511 /// reserved by the hardware and user SGPRs start at s8
1512 bool hasMergedShaders() const {
1513 return getGeneration() >= GFX9;
1514 }
1515
1516 // \returns true if the target supports the pre-NGG legacy geometry path.
1517 bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1518
1519 // \returns true if preloading kernel arguments is supported.
1520 bool hasKernargPreload() const { return KernargPreload; }
1521
1522 // \returns true if the target has split barriers feature
1523 bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
1524
1525 // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
1526 bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }
1527
1528 // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a
1529 // no-return form.
1531
1532 // \returns true if the target has DX10_CLAMP kernel descriptor mode bit
1533 bool hasDX10ClampMode() const { return getGeneration() < GFX12; }
1534
1535 // \returns true if the target has IEEE kernel descriptor mode bit
1536 bool hasIEEEMode() const { return getGeneration() < GFX12; }
1537
1538 // \returns true if the target has IEEE fminimum/fmaximum instructions
1540
1541 // \returns true if the target has WG_RR_MODE kernel descriptor mode bit
1542 bool hasRrWGMode() const { return getGeneration() >= GFX12; }
1543
1544 /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative
1545 /// values.
1546 bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
1547
1548 bool hasGFX1250Insts() const { return GFX1250Insts; }
1549
1550 bool hasVOPD3() const { return GFX1250Insts; }
1551
1552 // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
1553 bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }
1554
1555 // \returns true if the target has V_MAD_U32 instruction.
1556 bool hasMadU32Inst() const { return HasMadU32Inst; }
1557
1558 // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
1559 bool hasVectorMulU64() const { return GFX1250Insts; }
1560
1561 // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32
1562 // instructions.
1563 bool hasMadU64U32NoCarry() const { return GFX1250Insts; }
1564
1565 // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
1566 bool hasIntMinMax64() const { return GFX1250Insts; }
1567
1568 // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
1569 bool hasAddMinMaxInsts() const { return GFX1250Insts; }
1570
1571 // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
1572 bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
1573
1574 // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.
1575 bool hasPkMinMax3Insts() const { return GFX1250Insts; }
1576
1577 // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.
1578 bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }
1579
1580 // \returns true if target has S_SETPRIO_INC_WG instruction.
1582
1583 // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
1584 // of sign-extending. Note that GFX1250 has not only fixed the bug but also
1585 // extended VA to 57 bits.
1586 bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
1587
1588 // \returns true if the target needs to create a prolog for backward
1589 // compatibility when preloading kernel arguments.
1591 return hasKernargPreload() && !GFX1250Insts;
1592 }
1593
1594 /// \returns SGPR allocation granularity supported by the subtarget.
1595 unsigned getSGPRAllocGranule() const {
1597 }
1598
1599 /// \returns SGPR encoding granularity supported by the subtarget.
1600 unsigned getSGPREncodingGranule() const {
1602 }
1603
1604 /// \returns Total number of SGPRs supported by the subtarget.
1605 unsigned getTotalNumSGPRs() const {
1607 }
1608
1609 /// \returns Addressable number of SGPRs supported by the subtarget.
1610 unsigned getAddressableNumSGPRs() const {
1612 }
1613
1614 /// \returns Minimum number of SGPRs that meets the given number of waves per
1615 /// execution unit requirement supported by the subtarget.
1616 unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1617 return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1618 }
1619
1620 /// \returns Maximum number of SGPRs that meets the given number of waves per
1621 /// execution unit requirement supported by the subtarget.
1622 unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1623 return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1624 }
1625
1626 /// \returns Reserved number of SGPRs. This is common
1627 /// utility function called by MachineFunction and
1628 /// Function variants of getReservedNumSGPRs.
1629 unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1630 /// \returns Reserved number of SGPRs for given machine function \p MF.
1631 unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1632
1633 /// \returns Reserved number of SGPRs for given function \p F.
1634 unsigned getReservedNumSGPRs(const Function &F) const;
1635
1636 /// \returns Maximum number of preloaded SGPRs for the subtarget.
1637 unsigned getMaxNumPreloadedSGPRs() const;
1638
1639 /// \returns max num SGPRs. This is the common utility
1640 /// function called by MachineFunction and Function
1641 /// variants of getMaxNumSGPRs.
1642 unsigned getBaseMaxNumSGPRs(const Function &F,
1643 std::pair<unsigned, unsigned> WavesPerEU,
1644 unsigned PreloadedSGPRs,
1645 unsigned ReservedNumSGPRs) const;
1646
1647 /// \returns Maximum number of SGPRs that meets number of waves per execution
1648 /// unit requirement for function \p MF, or number of SGPRs explicitly
1649 /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1650 ///
1651 /// \returns Value that meets number of waves per execution unit requirement
1652 /// if explicitly requested value cannot be converted to integer, violates
1653 /// subtarget's specifications, or does not meet number of waves per execution
1654 /// unit requirement.
1655 unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1656
1657 /// \returns Maximum number of SGPRs that meets number of waves per execution
1658 /// unit requirement for function \p F, or number of SGPRs explicitly
1659 /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1660 ///
1661 /// \returns Value that meets number of waves per execution unit requirement
1662 /// if explicitly requested value cannot be converted to integer, violates
1663 /// subtarget's specifications, or does not meet number of waves per execution
1664 /// unit requirement.
1665 unsigned getMaxNumSGPRs(const Function &F) const;
1666
1667 /// \returns VGPR allocation granularity supported by the subtarget.
1668 unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {
1669 return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);
1670 }
1671
1672 /// \returns VGPR encoding granularity supported by the subtarget.
1673 unsigned getVGPREncodingGranule() const {
1675 }
1676
1677 /// \returns Total number of VGPRs supported by the subtarget.
1678 unsigned getTotalNumVGPRs() const {
1680 }
1681
1682 /// \returns Addressable number of architectural VGPRs supported by the
1683 /// subtarget.
1686 }
1687
1688 /// \returns Addressable number of VGPRs supported by the subtarget.
1689 unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {
1690 return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);
1691 }
1692
1693 /// \returns the minimum number of VGPRs that will prevent achieving more than
1694 /// the specified number of waves \p WavesPerEU.
1695 unsigned getMinNumVGPRs(unsigned WavesPerEU,
1696 unsigned DynamicVGPRBlockSize) const {
1697 return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,
1698 DynamicVGPRBlockSize);
1699 }
1700
1701 /// \returns the maximum number of VGPRs that can be used and still achieved
1702 /// at least the specified number of waves \p WavesPerEU.
1703 unsigned getMaxNumVGPRs(unsigned WavesPerEU,
1704 unsigned DynamicVGPRBlockSize) const {
1705 return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,
1706 DynamicVGPRBlockSize);
1707 }
1708
1709 /// \returns max num VGPRs. This is the common utility function
1710 /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1711 unsigned
1713 std::pair<unsigned, unsigned> NumVGPRBounds) const;
1714
1715 /// \returns Maximum number of VGPRs that meets number of waves per execution
1716 /// unit requirement for function \p F, or number of VGPRs explicitly
1717 /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1718 ///
1719 /// \returns Value that meets number of waves per execution unit requirement
1720 /// if explicitly requested value cannot be converted to integer, violates
1721 /// subtarget's specifications, or does not meet number of waves per execution
1722 /// unit requirement.
1723 unsigned getMaxNumVGPRs(const Function &F) const;
1724
1725 unsigned getMaxNumAGPRs(const Function &F) const {
1726 return getMaxNumVGPRs(F);
1727 }
1728
1729 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
1730 /// of waves per execution unit required for the function \p MF.
1731 std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;
1732
1733 /// \returns Maximum number of VGPRs that meets number of waves per execution
1734 /// unit requirement for function \p MF, or number of VGPRs explicitly
1735 /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1736 ///
1737 /// \returns Value that meets number of waves per execution unit requirement
1738 /// if explicitly requested value cannot be converted to integer, violates
1739 /// subtarget's specifications, or does not meet number of waves per execution
1740 /// unit requirement.
1741 unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1742
1743 bool supportsWave32() const { return getGeneration() >= GFX10; }
1744
1745 bool supportsWave64() const { return !hasGFX1250Insts(); }
1746
1747 bool isWave32() const {
1748 return getWavefrontSize() == 32;
1749 }
1750
1751 bool isWave64() const {
1752 return getWavefrontSize() == 64;
1753 }
1754
1755 /// Returns if the wavesize of this subtarget is known reliable. This is false
1756 /// only for the a default target-cpu that does not have an explicit
1757 /// +wavefrontsize target feature.
1758 bool isWaveSizeKnown() const {
1759 return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
1760 hasFeature(AMDGPU::FeatureWavefrontSize64);
1761 }
1762
1764 return getRegisterInfo()->getBoolRC();
1765 }
1766
1767 /// \returns Maximum number of work groups per compute unit supported by the
1768 /// subtarget and limited by given \p FlatWorkGroupSize.
1769 unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1770 return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1771 }
1772
1773 /// \returns Minimum flat work group size supported by the subtarget.
1774 unsigned getMinFlatWorkGroupSize() const override {
1776 }
1777
1778 /// \returns Maximum flat work group size supported by the subtarget.
1779 unsigned getMaxFlatWorkGroupSize() const override {
1781 }
1782
1783 /// \returns Number of waves per execution unit required to support the given
1784 /// \p FlatWorkGroupSize.
1785 unsigned
1786 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1787 return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1788 }
1789
1790 /// \returns Minimum number of waves per execution unit supported by the
1791 /// subtarget.
1792 unsigned getMinWavesPerEU() const override {
1794 }
1795
1796 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1797 SDep &Dep,
1798 const TargetSchedModel *SchedModel) const override;
1799
1800 // \returns true if it's beneficial on this subtarget for the scheduler to
1801 // cluster stores as well as loads.
1802 bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1803
1804 // \returns the number of address arguments from which to enable MIMG NSA
1805 // on supported architectures.
1806 unsigned getNSAThreshold(const MachineFunction &MF) const;
1807
1808 // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1809 // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1811
1812 // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1813 // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1815
1816 bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1817 unsigned getDynamicVGPRBlockSize() const {
1818 return DynamicVGPRBlockSize32 ? 32 : 16;
1819 }
1820
1822 // AMDGPU doesn't care if early-clobber and undef operands are allocated
1823 // to the same register.
1824 return false;
1825 }
1826
1827 // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything
1828 // and surronded by S_WAIT_ALU(0xFFE3).
1830 return getGeneration() == GFX12;
1831 }
1832
1833 // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1834 // read.
1836 return GFX1250Insts && getGeneration() == GFX12;
1837 }
1838};
1839
1841public:
1842 bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1843
1844 bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1845
1846 bool hasDispatchPtr() const { return DispatchPtr; }
1847
1848 bool hasQueuePtr() const { return QueuePtr; }
1849
1850 bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1851
1852 bool hasDispatchID() const { return DispatchID; }
1853
1854 bool hasFlatScratchInit() const { return FlatScratchInit; }
1855
1856 bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1857
1858 unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
1859
1860 unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
1861
1862 unsigned getNumFreeUserSGPRs();
1863
1864 void allocKernargPreloadSGPRs(unsigned NumSGPRs);
1865
1866 enum UserSGPRID : unsigned {
1876
1877 // Returns the size in number of SGPRs for preload user SGPR field.
1879 switch (ID) {
1881 return 2;
1883 return 4;
1884 case DispatchPtrID:
1885 return 2;
1886 case QueuePtrID:
1887 return 2;
1889 return 2;
1890 case DispatchIdID:
1891 return 2;
1892 case FlatScratchInitID:
1893 return 2;
1895 return 1;
1896 }
1897 llvm_unreachable("Unknown UserSGPRID.");
1898 }
1899
1900 GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1901
1902private:
1903 const GCNSubtarget &ST;
1904
1905 // Private memory buffer
1906 // Compute directly in sgpr[0:1]
1907 // Other shaders indirect 64-bits at sgpr[0:1]
1908 bool ImplicitBufferPtr = false;
1909
1910 bool PrivateSegmentBuffer = false;
1911
1912 bool DispatchPtr = false;
1913
1914 bool QueuePtr = false;
1915
1916 bool KernargSegmentPtr = false;
1917
1918 bool DispatchID = false;
1919
1920 bool FlatScratchInit = false;
1921
1922 bool PrivateSegmentSize = false;
1923
1924 unsigned NumKernargPreloadSGPRs = 0;
1925
1926 unsigned NumUsedUserSGPRs = 0;
1927};
1928
1929} // end namespace llvm
1930
1931#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool hasPrefetch() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
Definition: GCNSubtarget.h:949
bool hasFlat() const
Definition: GCNSubtarget.h:429
bool hasD16Images() const
Definition: GCNSubtarget.h:752
bool hasMinimum3Maximum3F32() const
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:66
bool useVGPRIndexMode() const
bool HasIEEEMinimumMaximumInsts
Definition: GCNSubtarget.h:273
bool hasAtomicDsPkAdd16Insts() const
Definition: GCNSubtarget.h:911
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:801
bool hasFlatGVSMode() const
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:257
bool hasPermlane32Swap() const
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:534
bool hasSwap() const
Definition: GCNSubtarget.h:489
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:891
bool HasAtomicFMinFMaxF64FlatInsts
Definition: GCNSubtarget.h:175
bool hasPkMinMax3Insts() const
bool hasDot2Insts() const
Definition: GCNSubtarget.h:831
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:744
bool hasMergedShaders() const
bool hasA16() const
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:805
bool hasRrWGMode() const
bool supportsBackOffBarrier() const
Definition: GCNSubtarget.h:615
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
Definition: GCNSubtarget.h:380
bool hasSafeCUPrefetch() const
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:435
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:523
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:514
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
Definition: GCNSubtarget.h:907
bool hasLdsAtomicAddF64() const
Definition: GCNSubtarget.h:728
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:740
bool flatScratchIsPointer() const
bool hasSDWAMac() const
Definition: GCNSubtarget.h:813
bool hasFP8ConversionInsts() const
Definition: GCNSubtarget.h:887
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
Definition: GCNSubtarget.h:851
bool hasApertureRegs() const
Definition: GCNSubtarget.h:649
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:68
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:518
bool hasBitOp3Insts() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:679
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
Definition: GCNSubtarget.h:903
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
bool hasIEEEMinimumMaximumInsts() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
Definition: GCNSubtarget.h:966
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:980
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
bool hasRelaxedBufferOOBMode() const
Definition: GCNSubtarget.h:647
bool hasPkAddMinMaxInsts() const
bool hasDLInsts() const
Definition: GCNSubtarget.h:821
bool hasExtendedImageInsts() const
bool hasVmemWriteVgprInOrder() const
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:455
bool HasTransposeLoadF4F6Insts
Definition: GCNSubtarget.h:243
bool hasMAIInsts() const
Definition: GCNSubtarget.h:879
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
bool has1024AddressableVGPRs() const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
Definition: GCNSubtarget.h:956
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:687
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:732
bool hasArchitectedSGPRs() const
bool hasFmaakFmamkF64Insts() const
bool hasTanhInsts() const
bool hasHWFP64() const
Definition: GCNSubtarget.h:413
bool hasScaleOffset() const
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:573
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool hasCvtScaleForwardingHazard() const
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:984
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:631
bool hasTensorCvtLutInsts() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:309
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:77
bool hasDot1Insts() const
Definition: GCNSubtarget.h:827
bool hasDot3Insts() const
Definition: GCNSubtarget.h:835
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:331
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:609
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
Definition: GCNSubtarget.h:919
unsigned getTotalNumSGPRs() const
bool hasGFX1250Insts() const
bool HasLdsBarrierArriveAtomic
Definition: GCNSubtarget.h:282
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:351
bool hasSafeSmemPrefetch() const
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool HasShaderCyclesHiLoRegisters
Definition: GCNSubtarget.h:213
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool hasPkMovB32() const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:599
bool hasScalarSubwordLoads() const
Definition: GCNSubtarget.h:501
bool hasDot11Insts() const
Definition: GCNSubtarget.h:867
bool enableFlatScratch() const
Definition: GCNSubtarget.h:704
bool hasMadF16() const
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
bool hasMin3Max3PKF16() const
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:619
bool hasR128A16() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasPrngInst() const
bool hasSignedScratchOffsets() const
bool HasPrivEnabledTrap2NopBug
Definition: GCNSubtarget.h:265
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:709
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:675
bool hasFmaMixBF16Insts() const
Definition: GCNSubtarget.h:479
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool hasVMemToLDSLoad() const
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:507
bool supportsWave32() const
bool hasCUStores() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:661
bool hasFlatAtomicFaddF32Inst() const
Definition: GCNSubtarget.h:939
bool hasKernargPreload() const
bool hasFP8Insts() const
Definition: GCNSubtarget.h:883
unsigned getMaxNumAGPRs(const Function &F) const
bool hasReadM0MovRelInterpHazard() const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:321
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool hasFmaakFmamkF32Insts() const
bool hasVscnt() const
Definition: GCNSubtarget.h:976
bool hasMad64_32() const
Definition: GCNSubtarget.h:797
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:335
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:122
bool hasHardClauses() const
bool useDS128() const
Definition: GCNSubtarget.h:583
bool hasExtendedWaitCounts() const
bool hasBVHDualAndBVH8Insts() const
bool hasMinimum3Maximum3PKF16() const
bool hasLshlAddU64Inst() const
bool hasLDSMisalignedBug() const
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:748
bool hasFmacF64Inst() const
Definition: GCNSubtarget.h:825
bool hasXF32Insts() const
bool hasInstPrefetch() const
bool hasAddPC64Inst() const
unsigned maxHardClauseLength() const
bool hasAshrPkInsts() const
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:793
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
Definition: GCNSubtarget.h:721
bool hasDPP() const
bool hasVINTERPEncoding() const
Definition: GCNSubtarget.h:725
bool hasGloballyAddressableScratch() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:343
bool hasAddSubU64Insts() const
bool hasLegacyGeometry() const
bool has64BitLiterals() const
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:503
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:665
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:313
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:635
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasMinimum3Maximum3F16() const
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:817
bool hasAtomicFMinFMaxF32GlobalInsts() const
Definition: GCNSubtarget.h:895
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
bool hasLdsBarrierArriveAtomic() const
bool hasGFX950Insts() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:347
unsigned getMaxNumPreloadedSGPRs() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:700
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool dumpCode() const
Definition: GCNSubtarget.h:559
bool hasNoDataDepHazard() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
bool useVGPRBlockOpsForCSR() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:627
bool hasAddMinMaxInsts() const
bool needsKernArgPreloadProlog() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:471
bool hasIntClamp() const
Definition: GCNSubtarget.h:401
bool hasGFX10_AEncoding() const
bool hasFP8E5M3Insts() const
Definition: GCNSubtarget.h:889
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:736
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
bool hasEmulatedSystemScopeAtomics() const
Definition: GCNSubtarget.h:962
bool hasMadU64U32NoCarry() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:555
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:317
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
Definition: GCNSubtarget.h:847
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:421
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:653
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool HasAtomicFlatPkAdd16Insts
Definition: GCNSubtarget.h:177
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:683
bool HasGloballyAddressableScratch
Definition: GCNSubtarget.h:287
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
Definition: GCNSubtarget.h:172
bool getScalarizeGlobalBehavior() const
bool HasAtomicFMinFMaxF32FlatInsts
Definition: GCNSubtarget.h:174
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
Definition: GCNSubtarget.h:786
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition: GCNSubtarget.h:376
bool hasScratchBaseForwardingHazard() const
bool hasIntMinMax64() const
bool hasShaderCyclesHiLoRegisters() const
Definition: GCNSubtarget.h:992
bool hasSDWASdst() const
Definition: GCNSubtarget.h:809
bool HasDefaultComponentBroadcast
Definition: GCNSubtarget.h:191
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:493
bool hasFFBL() const
Definition: GCNSubtarget.h:459
bool hasNSAEncoding() const
bool requiresDisjointEarlyClobberAndUndef() const override
bool hasVALUReadSGPRHazard() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:671
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:605
bool hasMovB64() const
bool hasVmemPrefInsts() const
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
Definition: GCNSubtarget.h:899
bool hasMed3_16() const
Definition: GCNSubtarget.h:467
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasUnalignedScratchAccessEnabled() const
Definition: GCNSubtarget.h:639
bool hasMovrel() const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool hasAtomicFlatPkAdd16Insts() const
Definition: GCNSubtarget.h:913
bool hasBFI() const
Definition: GCNSubtarget.h:447
bool hasDot13Insts() const
Definition: GCNSubtarget.h:875
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:758
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:250
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool isWave32() const
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
Definition: GCNSubtarget.h:182
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:623
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:384
unsigned getMinFlatWorkGroupSize() const override
bool hasImageInsts() const
bool hasImageGather4D16Bug() const
bool HasRequiredExportPriority
Definition: GCNSubtarget.h:270
bool hasFMA() const
Definition: GCNSubtarget.h:485
bool hasDot10Insts() const
Definition: GCNSubtarget.h:863
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:568
bool supportsWave64() const
bool HasAtomicBufferPkAddBF16Inst
Definition: GCNSubtarget.h:185
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFFBH() const
Definition: GCNSubtarget.h:463
bool hasFormattedMUBUFInsts() const
Definition: GCNSubtarget.h:719
bool hasFlatScratchSVSMode() const
Definition: GCNSubtarget.h:698
bool supportsWGP() const
Definition: GCNSubtarget.h:395
bool hasG16() const
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:417
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:915
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition: GCNSubtarget.h:181
bool hasPermlane16Swap() const
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:222
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition: GCNSubtarget.h:923
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:409
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasVOP3DPP() const
bool hasAtomicBufferPkAddBF16Inst() const
Definition: GCNSubtarget.h:935
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
Definition: GCNSubtarget.h:189
unsigned getMaxFlatWorkGroupSize() const override
bool hasDPP8() const
bool hasDot5Insts() const
Definition: GCNSubtarget.h:843
unsigned getMaxNumUserSGPRs() const
bool hasTransposeLoadF4F6Insts() const
bool hasMadU32Inst() const
bool hasAtomicFaddNoRtnInsts() const
Definition: GCNSubtarget.h:921
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition: GCNSubtarget.h:196
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
Definition: GCNSubtarget.h:943
bool HasEmulatedSystemScopeAtomics
Definition: GCNSubtarget.h:190
bool hasNoF16PseudoScalarTransInlineConstants() const
bool hasIEEEMode() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasSignedGVSOffset() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool hasBFM() const
Definition: GCNSubtarget.h:451
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:593
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
Definition: GCNSubtarget.h:855
bool hasVectorMulU64() const
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:497
bool hasSCmpK() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:339
bool requiresWaitIdleBeforeGetReg() const
bool hasPointSampleAccel() const
bool hasDot12Insts() const
Definition: GCNSubtarget.h:871
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:588
bool hasGWS() const
bool HasAtomicFMinFMaxF64GlobalInsts
Definition: GCNSubtarget.h:173
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:577
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
Generation getGeneration() const
Definition: GCNSubtarget.h:357
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:996
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition: GCNSubtarget.h:927
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:972
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:657
bool hasScalarAddSub64() const
Definition: GCNSubtarget.h:784
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:788
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:540
bool hasSGetShaderCyclesInst() const
bool hasRFEHazards() const
Definition: GCNSubtarget.h:550
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:546
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:694
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:772
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
Definition: GCNSubtarget.h:931
bool hasAddr64() const
Definition: GCNSubtarget.h:425
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:249
bool HasAtomicGlobalPkAddBF16Inst
Definition: GCNSubtarget.h:184
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:643
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:529
bool isWave64() const
unsigned getDynamicVGPRBlockSize() const
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:475
bool hasCARRY() const
Definition: GCNSubtarget.h:481
bool hasPackedTID() const
bool setRegModeNeedsVNOPs() const
bool hasFP64() const
Definition: GCNSubtarget.h:405
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:780
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:988
bool hasSALUFloatInsts() const
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:97
bool hasFractBug() const
Definition: GCNSubtarget.h:439
bool isPreciseMemoryEnabled() const
Definition: GCNSubtarget.h:669
bool hasDPPSrc1SGPR() const
bool hasGDS() const
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:361
bool HasMemoryAtomicFaddF32DenormalSupport
Definition: GCNSubtarget.h:180
bool hasMTBUFInsts() const
Definition: GCNSubtarget.h:717
bool hasDot4Insts() const
Definition: GCNSubtarget.h:839
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool hasWaitXCnt() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasSetPrioIncWgInst() const
~GCNSubtarget() override
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool hasDot9Insts() const
Definition: GCNSubtarget.h:859
bool hasVOPD3() const
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:713
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:64
bool hasDefaultComponentBroadcast() const
Definition: GCNSubtarget.h:968
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:327
bool hasBFE() const
Definition: GCNSubtarget.h:443
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:767
bool HasFlatBufferGlobalAtomicFaddF64Inst
Definition: GCNSubtarget.h:187
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasPrivateSegmentBuffer() const
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
bool hasPrivateSegmentSize() const
unsigned getNumUsedUserSGPRs() const
Itinerary data supplied by a subtarget to be used by a target.
Scheduling dependency.
Definition: ScheduleDAG.h:51
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:235
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:203
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
A region of an MBB for scheduling.