42 cl::desc(
"Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
48 if (!HSAMetadataDoc.
fromYAML(HSAMetadataString))
241 OS <<
"\t.amdgcn_target \"" <<
getTargetID()->toString() <<
"\"\n";
247 OS <<
"\t.amdhsa_code_object_version " << COV <<
'\n';
256 OS <<
"\t.amd_kernel_code_t\n";
257 Header.EmitKernelCodeT(OS,
getContext(), FoldAndPrint);
258 OS <<
"\t.end_amd_kernel_code_t\n";
266 OS <<
"\t.amdgpu_hsa_kernel " << SymbolName <<
'\n' ;
273 OS <<
"\t.amdgpu_lds " << Symbol->getName() <<
", " <<
Size <<
", "
274 << Alignment.
value() <<
'\n';
283#define PRINT_RES_INFO(ARG) \
285 ARG->print(OS, getContext().getAsmInfo()); \
287 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
288 Streamer.addBlankLine();
306#define PRINT_RES_INFO(ARG) \
308 ARG->print(OS, getContext().getAsmInfo()); \
310 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
311 Streamer.addBlankLine();
320 OS <<
"\t.amd_amdgpu_isa \"" <<
getTargetID()->toString() <<
"\"\n";
327 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
330 std::string HSAMetadataString;
332 HSAMetadataDoc.
toYAML(StrOS);
335 OS << StrOS.
str() <<
'\n';
341 const uint32_t Encoded_s_code_end = 0xbf9f0000;
342 const uint32_t Encoded_s_nop = 0xbf800000;
343 uint32_t Encoded_pad = Encoded_s_code_end;
353 Encoded_pad = Encoded_s_nop;
357 OS <<
"\t.p2alignl " << Log2CacheLineSize <<
", " << Encoded_pad <<
'\n';
358 OS <<
"\t.fill " << (FillSize / 4) <<
", 4, " << Encoded_pad <<
'\n';
366 const MCExpr *ReserveFlatScr) {
370 OS <<
"\t.amdhsa_kernel " << KernelName <<
'\n';
375 const MCExpr *ShiftedAndMaskedExpr =
387 OS <<
"\t\t.amdhsa_group_segment_fixed_size ";
391 OS <<
"\t\t.amdhsa_private_segment_fixed_size ";
395 OS <<
"\t\t.amdhsa_kernarg_size ";
401 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
402 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
403 ".amdhsa_user_sgpr_count");
406 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
407 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
408 ".amdhsa_user_sgpr_count");
414 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
415 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
416 ".amdhsa_user_sgpr_private_segment_buffer");
418 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
419 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
420 ".amdhsa_user_sgpr_dispatch_ptr");
422 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
423 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
424 ".amdhsa_user_sgpr_queue_ptr");
426 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
427 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
428 ".amdhsa_user_sgpr_kernarg_segment_ptr");
430 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
431 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
432 ".amdhsa_user_sgpr_dispatch_id");
435 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
436 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
437 ".amdhsa_user_sgpr_flat_scratch_init");
440 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
441 ".amdhsa_user_sgpr_kernarg_preload_length");
443 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
444 ".amdhsa_user_sgpr_kernarg_preload_offset");
448 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
449 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
450 ".amdhsa_user_sgpr_private_segment_size");
451 if (IVersion.
Major >= 10)
453 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
454 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
455 ".amdhsa_wavefront_size32");
458 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
459 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
460 ".amdhsa_uses_dynamic_stack");
462 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
463 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
465 ?
".amdhsa_enable_private_segment"
466 :
".amdhsa_system_sgpr_private_segment_wavefront_offset"));
468 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
469 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
470 ".amdhsa_system_sgpr_workgroup_id_x");
472 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
474 ".amdhsa_system_sgpr_workgroup_id_y");
476 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
477 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
478 ".amdhsa_system_sgpr_workgroup_id_z");
480 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
481 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
482 ".amdhsa_system_sgpr_workgroup_info");
484 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
485 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
486 ".amdhsa_system_vgpr_workitem_id");
489 OS <<
"\t\t.amdhsa_next_free_vgpr ";
490 EmitMCExpr(NextVGPR);
493 OS <<
"\t\t.amdhsa_next_free_sgpr ";
494 EmitMCExpr(NextSGPR);
501 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
502 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
getContext());
507 OS <<
"\t\t.amdhsa_accum_offset ";
515 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
516 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
517 ".amdhsa_named_barrier_count");
519 OS <<
"\t\t.amdhsa_reserve_vcc ";
520 EmitMCExpr(ReserveVCC);
524 OS <<
"\t\t.amdhsa_reserve_flat_scratch ";
525 EmitMCExpr(ReserveFlatScr);
535 OS <<
"\t\t.amdhsa_reserve_xnack_mask " <<
getTargetID()->isXnackOnOrAny() <<
'\n';
540 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
541 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
542 ".amdhsa_float_round_mode_32");
544 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
545 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
546 ".amdhsa_float_round_mode_16_64");
548 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
549 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
550 ".amdhsa_float_denorm_mode_32");
552 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
553 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
554 ".amdhsa_float_denorm_mode_16_64");
555 if (IVersion.
Major < 12) {
557 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
558 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
559 ".amdhsa_dx10_clamp");
561 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
562 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
563 ".amdhsa_ieee_mode");
565 if (IVersion.
Major >= 9) {
567 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
568 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
569 ".amdhsa_fp16_overflow");
573 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
574 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
".amdhsa_tg_split");
577 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
578 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
579 ".amdhsa_workgroup_processor_mode");
580 if (IVersion.
Major >= 10) {
582 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
583 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
584 ".amdhsa_memory_ordered");
586 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
587 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
588 ".amdhsa_forward_progress");
590 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
592 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
593 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
594 ".amdhsa_shared_vgpr_count");
596 if (IVersion.
Major == 11) {
598 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
599 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
600 ".amdhsa_inst_pref_size");
602 if (IVersion.
Major >= 12) {
604 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
605 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
606 ".amdhsa_inst_pref_size");
608 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
609 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
610 ".amdhsa_round_robin_scheduling");
615 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
616 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
617 ".amdhsa_exception_fp_ieee_invalid_op");
620 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
621 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
622 ".amdhsa_exception_fp_denorm_src");
626 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
627 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
628 ".amdhsa_exception_fp_ieee_div_zero");
631 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
632 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
633 ".amdhsa_exception_fp_ieee_overflow");
636 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
637 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
638 ".amdhsa_exception_fp_ieee_underflow");
641 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
642 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
643 ".amdhsa_exception_fp_ieee_inexact");
646 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
647 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
648 ".amdhsa_exception_int_div_zero");
650 OS <<
"\t.end_amdhsa_kernel\n";
670 W.setELFHeaderEFlags(getEFlags());
671 W.setOverrideABIVersion(
688void AMDGPUTargetELFStreamer::EmitNote(
692 auto &Context = S.getContext();
694 auto NameSZ = Name.size() + 1;
696 unsigned NoteFlags = 0;
706 S.emitValue(DescSZ, 4);
707 S.emitInt32(NoteType);
709 S.emitValueToAlignment(
Align(4), 0, 1, 0);
711 S.emitValueToAlignment(
Align(4), 0, 1, 0);
715unsigned AMDGPUTargetELFStreamer::getEFlags() {
720 return getEFlagsR600();
722 return getEFlagsAMDGCN();
726unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
732unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
733 assert(STI.getTargetTriple().isAMDGCN());
735 switch (STI.getTargetTriple().getOS()) {
740 return getEFlagsUnknownOS();
742 return getEFlagsAMDHSA();
744 return getEFlagsAMDPAL();
746 return getEFlagsMesa3D();
750unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
754 return getEFlagsV3();
757unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
761 return getEFlagsV6();
762 return getEFlagsV4();
765unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
768 return getEFlagsV3();
771unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
774 return getEFlagsV3();
777unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
778 unsigned EFlagsV3 = 0;
793unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
794 unsigned EFlagsV4 = 0;
833unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
834 unsigned Flags = getEFlagsV4();
867 " - no ELF flag can represent this version!");
892 auto *SymbolELF =
static_cast<MCSymbolELF *
>(Symbol);
895 if (!SymbolELF->isBindingSet())
898 if (SymbolELF->declareCommon(
Size, Alignment)) {
900 " redeclared as different type");
911 auto *DescBegin = Context.createTempSymbol();
912 auto *DescEnd = Context.createTempSymbol();
929 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
932 std::string HSAMetadataString;
938 auto *DescBegin = Context.createTempSymbol();
939 auto *DescEnd = Context.createTempSymbol();
954 const uint32_t Encoded_s_code_end = 0xbf9f0000;
955 const uint32_t Encoded_s_nop = 0xbf800000;
956 uint32_t Encoded_pad = Encoded_s_code_end;
966 Encoded_pad = Encoded_s_nop;
973 for (
unsigned I = 0;
I < FillSize;
I += 4)
983 const MCExpr *ReserveFlatScr) {
985 auto &Context = Streamer.getContext();
987 auto *KernelCodeSymbol =
989 auto *KernelDescriptorSymbol =
static_cast<MCSymbolELF *
>(
990 Context.getOrCreateSymbol(
Twine(KernelName) +
Twine(
".kd")));
994 KernelDescriptorSymbol->
setBinding(KernelCodeSymbol->getBinding());
995 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
996 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
999 KernelDescriptorSymbol->setSize(
1007 Streamer.emitLabel(KernelDescriptorSymbol);
1018 Streamer.emitInt8(0u);
1031 Streamer.emitInt8(0u);
1044 Streamer.emitInt8(0u);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
verify safepoint Safepoint IR Verifier
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR) override
bool EmitISAVersion() override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitDirectiveAMDGCNTarget() override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitDirectiveAMDGCNTarget() override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
MCELFStreamer & getStreamer()
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
AMDGPUTargetStreamer(MCStreamer &S)
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
unsigned CodeObjectVersion
This class is intended to be used as a base class for asm properties and features specific to the tar...
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
const MCAsmInfo * getAsmInfo() const
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
ELFObjectWriter & getWriter()
void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc()) override
Emit a label for Symbol into the current section.
Base class for the full range of assembler expressions which are needed for parsing.
void emitBytes(StringRef Data) override
Emit the bytes in Data into the output.
Streaming machine code generation interface.
virtual bool popSection()
Restore the current and previous section from the section stack.
MCContext & getContext() const
virtual void emitValueToAlignment(Align Alignment, int64_t Fill=0, uint8_t FillLen=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
void pushSection()
Save the current and previous section on the section stack.
void emitInt32(uint64_t Value)
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
void setBinding(unsigned Binding) const
void setType(unsigned Type) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
StringRef - Represent a constant reference to a string, i.e.
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
LLVM_ABI StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
LLVM_ABI GPUKind parseArchR600(StringRef CPU)
@ EF_AMDGPU_GENERIC_VERSION_MAX
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX703
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
@ EF_AMDGPU_FEATURE_SRAMECC_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
@ EF_AMDGPU_MACH_R600_CAYMAN
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX704
@ EF_AMDGPU_MACH_AMDGCN_GFX902
@ EF_AMDGPU_MACH_AMDGCN_GFX810
@ EF_AMDGPU_MACH_AMDGCN_GFX950
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
@ EF_AMDGPU_MACH_R600_RV730
@ EF_AMDGPU_MACH_R600_RV710
@ EF_AMDGPU_MACH_AMDGCN_GFX908
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
@ EF_AMDGPU_MACH_R600_CYPRESS
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
@ EF_AMDGPU_MACH_R600_R600
@ EF_AMDGPU_MACH_AMDGCN_GFX1250
@ EF_AMDGPU_MACH_R600_TURKS
@ EF_AMDGPU_MACH_R600_JUNIPER
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX601
@ EF_AMDGPU_MACH_AMDGCN_GFX942
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
@ EF_AMDGPU_MACH_R600_R630
@ EF_AMDGPU_MACH_R600_REDWOOD
@ EF_AMDGPU_MACH_R600_RV770
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX600
@ EF_AMDGPU_FEATURE_XNACK_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX602
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
@ EF_AMDGPU_MACH_AMDGCN_GFX801
@ EF_AMDGPU_MACH_AMDGCN_GFX705
@ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1153
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
@ EF_AMDGPU_MACH_R600_RV670
@ EF_AMDGPU_MACH_AMDGCN_GFX701
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
@ EF_AMDGPU_MACH_R600_CEDAR
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
@ EF_AMDGPU_MACH_AMDGCN_GFX700
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX803
@ EF_AMDGPU_MACH_AMDGCN_GFX802
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX900
@ EF_AMDGPU_MACH_AMDGCN_GFX909
@ EF_AMDGPU_MACH_AMDGCN_GFX906
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
@ EF_AMDGPU_MACH_R600_CAICOS
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX904
@ EF_AMDGPU_MACH_R600_RS880
@ EF_AMDGPU_MACH_AMDGCN_GFX805
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
@ EF_AMDGPU_MACH_R600_SUMO
@ EF_AMDGPU_MACH_R600_BARTS
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX702
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
uint32_t group_segment_fixed_size
uint32_t compute_pgm_rsrc1
uint32_t private_segment_fixed_size
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3
int64_t kernel_code_entry_byte_offset