LLVM: lib/Target/NVPTX/NVPTXSubtarget.h Source File

//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file declares the NVPTX specific subclass of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H

#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H


#include "NVPTX.h"

#include "NVPTXFrameLowering.h"

#include "NVPTXISelLowering.h"

#include "NVPTXInstrInfo.h"

#include "NVPTXRegisterInfo.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/IR/DataLayout.h"

#include <string>


#define GET_SUBTARGETINFO_HEADER

#include "NVPTXGenSubtargetInfo.inc"


namespace llvm {


class NVPTXSubtarget : public NVPTXGenSubtargetInfo {

  virtual void anchor();

  std::string TargetName;


  // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31

  unsigned PTXVersion;


  // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310

  // sm_90a == 901

  unsigned int FullSmVersion;


  // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from

  // FullSmVersion.

  unsigned int SmVersion;


  NVPTXInstrInfo InstrInfo;

  NVPTXTargetLowering TLInfo;

  std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;


  // NVPTX does not have any call stack frame, but need a NVPTX specific

  // FrameLowering class because TargetFrameLowering is abstract.

  NVPTXFrameLowering FrameLowering;


public:

  /// This constructor initializes the data members to match that

  /// of the specified module.

  ///

  NVPTXSubtarget(const Triple &TT, const std::string &CPU,

                 const std::string &FS, const NVPTXTargetMachine &TM);


  ~NVPTXSubtarget() override;


  const TargetFrameLowering *getFrameLowering() const override {

    return &FrameLowering;

  }

  const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }

  const NVPTXRegisterInfo *getRegisterInfo() const override {

    return &InstrInfo.getRegisterInfo();

  }

  const NVPTXTargetLowering *getTargetLowering() const override {

    return &TLInfo;

  }


  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;


  bool hasAtomAddF64() const { return SmVersion >= 60; }

  bool hasAtomScope() const { return SmVersion >= 60; }

  bool hasAtomBitwise64() const { return SmVersion >= 32; }

  bool hasAtomMinMax64() const { return SmVersion >= 32; }

  bool hasAtomCas16() const { return SmVersion >= 70 && PTXVersion >= 63; }

  bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }

  bool hasLDG() const { return SmVersion >= 32; }

  bool hasHWROT32() const { return SmVersion >= 32; }

  bool hasFP16Math() const { return SmVersion >= 53; }

  bool hasBF16Math() const { return SmVersion >= 80; }

  bool allowFP16Math() const;

  bool hasMaskOperator() const { return PTXVersion >= 71; }

  bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }

  // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,

  // release, acq_rel, sc) ?

  bool hasMemoryOrdering() const { return SmVersion >= 70 && PTXVersion >= 60; }

  // Does SM & PTX support .acquire and .release qualifiers for fence?

  bool hasSplitAcquireAndReleaseFences() const {

    return SmVersion >= 90 && PTXVersion >= 86;

  }

  // Does SM & PTX support atomic relaxed MMIO operations ?

  bool hasRelaxedMMIO() const { return SmVersion >= 70 && PTXVersion >= 82; }

  bool hasDotInstructions() const {

    return SmVersion >= 61 && PTXVersion >= 50;

  }

  // Tcgen05 instructions in Blackwell family

  bool hasTcgen05Instructions() const {

    bool HasTcgen05 = false;

    switch (FullSmVersion) {

    default:

      break;

    case 1001: // sm_100a

    case 1011: // sm_101a

      HasTcgen05 = true;

      break;

    }


    return HasTcgen05 && PTXVersion >= 86;

  }


  // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction

  // terminates a basic block. Instead, it would assume that control flow

  // continued to the next instruction. The next instruction could be in the

  // block that's lexically below it. This would lead to a phantom CFG edges

  // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when

  // PTX ISA versions 8.3+ we can confidently say that the bug will not be

  // present.

  bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }

  bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }

  unsigned int getFullSmVersion() const { return FullSmVersion; }

  unsigned int getSmVersion() const { return getFullSmVersion() / 10; }

  // GPUs with "a" suffix have include architecture-accelerated features that

  // are supported on the specified architecture only, hence such targets do not

  // follow the onion layer model. hasAAFeatures() allows distinguishing such

  // GPU variants from the base GPU architecture.

  // - 0 represents base GPU model,

  // - non-zero value identifies particular architecture-accelerated variant.

  bool hasAAFeatures() const { return getFullSmVersion() % 10; }


  // If the user did not provide a target we default to the `sm_30` target.

  std::string getTargetName() const {

    return TargetName.empty() ? "sm_30" : TargetName;

  }

  bool hasTargetName() const { return !TargetName.empty(); }


  bool hasNativeBF16Support(int Opcode) const;


  // Get maximum value of required alignments among the supported data types.

  // From the PTX ISA doc, section 8.2.3:

  //  The memory consistency model relates operations executed on memory

  //  locations with scalar data-types, which have a maximum size and alignment

  //  of 64 bits. Memory operations with a vector data-type are modelled as a

  //  set of equivalent memory operations with a scalar data-type, executed in

  //  an unspecified order on the elements in the vector.

  unsigned getMaxRequiredAlignment() const { return 8; }

  // Emulated loops with 32-bit/64-bit CAS generate better SASS than 16-bit CAS

  unsigned getMinCmpXchgSizeInBits() const { return 32; }


  unsigned getPTXVersion() const { return PTXVersion; }


  NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);

  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);


  void failIfClustersUnsupported(std::string const &FailureMessage) const;

};


} // End llvm namespace


#endif

DataLayout.h

NVPTXFrameLowering.h

NVPTXISelLowering.h

NVPTXInstrInfo.h

NVPTXRegisterInfo.h

NVPTX.h

SmVersion
unsigned SmVersion
Definition: NVVMReflect.cpp:81

TargetSubtargetInfo.h

NVPTXGenSubtargetInfo

llvm::NVPTXFrameLowering
Definition: NVPTXFrameLowering.h:21

llvm::NVPTXInstrInfo
Definition: NVPTXInstrInfo.h:25

llvm::NVPTXInstrInfo::getRegisterInfo
const NVPTXRegisterInfo & getRegisterInfo() const
Definition: NVPTXInstrInfo.h:31

llvm::NVPTXRegisterInfo
Definition: NVPTXRegisterInfo.h:24

llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:30

llvm::NVPTXSubtarget::getInstrInfo
const NVPTXInstrInfo * getInstrInfo() const override
Definition: NVPTXSubtarget.h:65

llvm::NVPTXSubtarget::failIfClustersUnsupported
void failIfClustersUnsupported(std::string const &FailureMessage) const
Definition: NVPTXSubtarget.cpp:105

llvm::NVPTXSubtarget::getTargetName
std::string getTargetName() const
Definition: NVPTXSubtarget.h:135

llvm::NVPTXSubtarget::getMaxRequiredAlignment
unsigned getMaxRequiredAlignment() const
Definition: NVPTXSubtarget.h:149

llvm::NVPTXSubtarget::hasAtomMinMax64
bool hasAtomMinMax64() const
Definition: NVPTXSubtarget.h:78

llvm::NVPTXSubtarget::hasAtomAddF64
bool hasAtomAddF64() const
Definition: NVPTXSubtarget.h:75

llvm::NVPTXSubtarget::hasHWROT32
bool hasHWROT32() const
Definition: NVPTXSubtarget.h:82

llvm::NVPTXSubtarget::hasSplitAcquireAndReleaseFences
bool hasSplitAcquireAndReleaseFences() const
Definition: NVPTXSubtarget.h:92

llvm::NVPTXSubtarget::hasClusters
bool hasClusters() const
Definition: NVPTXSubtarget.h:80

llvm::NVPTXSubtarget::hasMaskOperator
bool hasMaskOperator() const
Definition: NVPTXSubtarget.h:86

llvm::NVPTXSubtarget::hasAAFeatures
bool hasAAFeatures() const
Definition: NVPTXSubtarget.h:132

llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:69

llvm::NVPTXSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

llvm::NVPTXSubtarget::getMinCmpXchgSizeInBits
unsigned getMinCmpXchgSizeInBits() const
Definition: NVPTXSubtarget.h:151

llvm::NVPTXSubtarget::getPTXVersion
unsigned getPTXVersion() const
Definition: NVPTXSubtarget.h:153

llvm::NVPTXSubtarget::hasCvtaParam
bool hasCvtaParam() const
Definition: NVPTXSubtarget.h:123

llvm::NVPTXSubtarget::~NVPTXSubtarget
~NVPTXSubtarget() override

llvm::NVPTXSubtarget::hasNativeBF16Support
bool hasNativeBF16Support(int Opcode) const
Definition: NVPTXSubtarget.cpp:73

llvm::NVPTXSubtarget::getRegisterInfo
const NVPTXRegisterInfo * getRegisterInfo() const override
Definition: NVPTXSubtarget.h:66

llvm::NVPTXSubtarget::getFullSmVersion
unsigned int getFullSmVersion() const
Definition: NVPTXSubtarget.h:124

llvm::NVPTXSubtarget::getSmVersion
unsigned int getSmVersion() const
Definition: NVPTXSubtarget.h:125

llvm::NVPTXSubtarget::hasDotInstructions
bool hasDotInstructions() const
Definition: NVPTXSubtarget.h:97

llvm::NVPTXSubtarget::hasTcgen05Instructions
bool hasTcgen05Instructions() const
Definition: NVPTXSubtarget.h:101

llvm::NVPTXSubtarget::hasAtomBitwise64
bool hasAtomBitwise64() const
Definition: NVPTXSubtarget.h:77

llvm::NVPTXSubtarget::hasRelaxedMMIO
bool hasRelaxedMMIO() const
Definition: NVPTXSubtarget.h:96

llvm::NVPTXSubtarget::hasTargetName
bool hasTargetName() const
Definition: NVPTXSubtarget.h:138

llvm::NVPTXSubtarget::hasBF16Math
bool hasBF16Math() const
Definition: NVPTXSubtarget.h:84

llvm::NVPTXSubtarget::hasLDG
bool hasLDG() const
Definition: NVPTXSubtarget.h:81

llvm::NVPTXSubtarget::allowFP16Math
bool allowFP16Math() const
Definition: NVPTXSubtarget.cpp:69

llvm::NVPTXSubtarget::getFrameLowering
const TargetFrameLowering * getFrameLowering() const override
Definition: NVPTXSubtarget.h:62

llvm::NVPTXSubtarget::hasAtomScope
bool hasAtomScope() const
Definition: NVPTXSubtarget.h:76

llvm::NVPTXSubtarget::hasAtomCas16
bool hasAtomCas16() const
Definition: NVPTXSubtarget.h:79

llvm::NVPTXSubtarget::hasMemoryOrdering
bool hasMemoryOrdering() const
Definition: NVPTXSubtarget.h:90

llvm::NVPTXSubtarget::initializeSubtargetDependencies
NVPTXSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
Definition: NVPTXSubtarget.cpp:35

llvm::NVPTXSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: NVPTXSubtarget.cpp:65

llvm::NVPTXSubtarget::hasPTXASUnreachableBug
bool hasPTXASUnreachableBug() const
Definition: NVPTXSubtarget.h:122

llvm::NVPTXSubtarget::hasFP16Math
bool hasFP16Math() const
Definition: NVPTXSubtarget.h:83

llvm::NVPTXSubtarget::hasNoReturn
bool hasNoReturn() const
Definition: NVPTXSubtarget.h:87

llvm::NVPTXTargetLowering
Definition: NVPTXISelLowering.h:99

llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25

llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:45

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18