97#define DEPOTNAME "__local_depot"
109 if (
const User *U = dyn_cast<User>(V))
110 for (
const auto &O : U->operands())
123 if (Visited.
count(GV))
127 if (!Visiting.
insert(GV).second)
132 for (
const auto &O : GV->
operands())
145 NVPTX_MC::verifyInstructionPredicates(
MI->getOpcode(),
149 lowerToMCInst(
MI, Inst);
156 if (
MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
163 for (
const auto MO :
MI->operands())
208unsigned NVPTXAsmPrinter::encodeVirtualRegister(
unsigned Reg) {
213 unsigned RegNum = RegMap[
Reg];
218 if (RC == &NVPTX::B1RegClass) {
220 }
else if (RC == &NVPTX::B16RegClass) {
222 }
else if (RC == &NVPTX::B32RegClass) {
224 }
else if (RC == &NVPTX::B64RegClass) {
226 }
else if (RC == &NVPTX::B128RegClass) {
233 Ret |= (RegNum & 0x0FFFFFFF);
238 return Reg & 0x0FFFFFFF;
253 Type *Ty =
F->getReturnType();
258 auto PrintScalarRetVal = [&](
unsigned Size) {
262 const unsigned TotalSize =
DL.getTypeAllocSize(Ty);
263 const Align RetAlignment = TLI->getFunctionArgumentAlignment(
265 O <<
".param .align " << RetAlignment.
value() <<
" .b8 func_retval0["
269 }
else if (
auto *ITy = dyn_cast<IntegerType>(Ty)) {
270 PrintScalarRetVal(ITy->getBitWidth());
271 }
else if (isa<PointerType>(Ty)) {
272 PrintScalarRetVal(TLI->getPointerTy(
DL).getSizeInBits());
281 printReturnValStr(&F, O);
286bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
288 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
301 if (
const BasicBlock *PBB = PMBB->getBasicBlock()) {
303 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
306 if (
MDNode *UnrollCountMD =
308 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
320 if (isLoopHeaderOfNoUnroll(
MBB))
324void NVPTXAsmPrinter::emitFunctionEntryLabel() {
328 if (!GlobalsEmitted) {
330 GlobalsEmitted =
true;
336 emitLinkageDirective(F, O);
341 printReturnValStr(*
MF, O);
346 emitFunctionParamList(F, O);
350 emitKernelFunctionDirectives(*F, O);
360 setAndEmitFunctionVirtualRegisters(*
MF);
361 encodeDebugInfoRegisterNumbers(*
MF);
365 if (!
SP->getUnit()->isDebugDirectivesOnly())
381void NVPTXAsmPrinter::emitFunctionBodyStart() {
388void NVPTXAsmPrinter::emitFunctionBodyEnd() {
398void NVPTXAsmPrinter::emitImplicitDef(
const MachineInstr *
MI)
const {
411void NVPTXAsmPrinter::emitKernelFunctionDirectives(
const Function &
F,
417 if (!ReqNTID.empty())
418 O <<
formatv(
".reqntid {0:$[, ]}\n",
422 if (!MaxNTID.empty())
423 O <<
formatv(
".maxntid {0:$[, ]}\n",
427 O <<
".minnctapersm " << *Mincta <<
"\n";
430 O <<
".maxnreg " << *Maxnreg <<
"\n";
441 if (!ClusterDim.empty()) {
443 if (!BlocksAreClusters)
444 O <<
".explicitcluster\n";
446 if (ClusterDim[0] != 0) {
448 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
449 "should be non-zero as well");
451 O <<
formatv(
".reqnctapercluster {0:$[, ]}\n",
452 make_range(ClusterDim.begin(), ClusterDim.end()));
455 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
456 "should be 0 as well");
460 if (BlocksAreClusters) {
462 if (ReqNTID.empty() || ClusterDim.empty())
464 F,
"blocksareclusters requires reqntid and cluster_dim attributes",
468 F,
"blocksareclusters requires PTX version >= 9.0",
471 O <<
".blocksareclusters\n";
475 O <<
".maxclusterrank " << *Maxclusterrank <<
"\n";
486 assert(
I != VRegMapping.
end() &&
"Bad register class");
490 assert(VI != RegMap.
end() &&
"Bad virtual register");
491 unsigned MappedVR = VI->second;
498void NVPTXAsmPrinter::emitVirtualRegister(
unsigned int vr,
503void NVPTXAsmPrinter::emitAliasDeclaration(
const GlobalAlias *GA,
508 "NVPTX aliasee must be a non-kernel function definition");
514 emitDeclarationWithName(F,
getSymbol(GA), O);
518 emitDeclarationWithName(F,
getSymbol(F), O);
523 emitLinkageDirective(F, O);
528 printReturnValStr(F, O);
531 emitFunctionParamList(F, O);
543 return GV->getName() !=
"llvm.used";
545 for (
const User *U :
C->users())
546 if (
const Constant *
C = dyn_cast<Constant>(U))
555 if (OtherGV->getName() ==
"llvm.used")
559 if (
const Function *CurFunc =
I->getFunction()) {
560 if (OneFunc && (CurFunc != OneFunc))
568 for (
const User *UU : U->users())
601 for (
const User *U :
C->users()) {
602 if (
const Constant *cu = dyn_cast<Constant>(U)) {
605 }
else if (
const Instruction *
I = dyn_cast<Instruction>(U)) {
606 if (
const Function *Caller =
I->getFunction())
617 if (
F.getAttributes().hasFnAttr(
"nvptx-libcall-callee")) {
618 emitDeclaration(&F, O);
622 if (
F.isDeclaration()) {
625 if (
F.getIntrinsicID())
627 emitDeclaration(&F, O);
630 for (
const User *U :
F.users()) {
631 if (
const Constant *
C = dyn_cast<Constant>(U)) {
636 emitDeclaration(&F, O);
642 emitDeclaration(&F, O);
647 if (!isa<Instruction>(U))
657 emitDeclaration(&F, O);
664 emitAliasDeclaration(&GA, O);
667void NVPTXAsmPrinter::emitStartOfAsmFile(
Module &M) {
677 emitHeader(M, OS1, *STI);
691 GlobalsEmitted =
false;
696void NVPTXAsmPrinter::emitGlobals(
const Module &M) {
700 emitDeclarations(M, OS2);
715 assert(GVVisited.
size() == M.global_size() &&
"Missed a global variable");
716 assert(GVVisiting.
size() == 0 &&
"Did not fully process a global variable");
724 printModuleLevelGV(GV, OS2,
false, STI);
748 "// Generated by LLVM NVPTX Back-End\n"
751 <<
".version " << (PTXVersion / 10) <<
"." << (PTXVersion % 10) <<
"\n"
756 O <<
", texmode_independent";
758 bool HasFullDebugInfo =
false;
760 switch(
CU->getEmissionKind()) {
766 HasFullDebugInfo =
true;
769 if (HasFullDebugInfo)
772 if (HasFullDebugInfo)
776 <<
".address_size " << (NTM.
is64Bit() ?
"64" :
"32") <<
"\n"
783 if (!GlobalsEmitted) {
785 GlobalsEmitted =
true;
799 OutStreamer->emitRawText(
"\t.section\t.debug_macinfo\t{\t}");
803 TS->outputDwarfFileDirectives();
821void NVPTXAsmPrinter::emitLinkageDirective(
const GlobalValue *V,
824 if (V->hasExternalLinkage()) {
825 if (
const auto *GVar = dyn_cast<GlobalVariable>(V))
826 O << (GVar->hasInitializer() ?
".visible " :
".extern ");
827 else if (V->isDeclaration())
831 }
else if (V->hasAppendingLinkage()) {
833 "' has unsupported appending linkage type");
834 }
else if (!
V->hasInternalLinkage() && !
V->hasPrivateLinkage()) {
840void NVPTXAsmPrinter::printModuleLevelGV(
const GlobalVariable *GVar,
886 emitPTXGlobalVariable(GVar, O, STI);
894 const Constant *Initializer =
nullptr;
899 CI = dyn_cast<ConstantInt>(Initializer);
908 O <<
"addr_mode_" << i <<
" = ";
914 O <<
"clamp_to_border";
917 O <<
"clamp_to_edge";
928 O <<
"filter_mode = ";
943 O <<
", force_unnormalized_coords = 1";
963 const Function *DemotedFunc =
nullptr;
965 O <<
"// " << GVar->
getName() <<
" has been demoted\n";
966 localDecls[DemotedFunc].push_back(GVar);
976 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
977 O <<
" .attribute(.managed)";
981 << GVar->
getAlign().value_or(
DL.getPrefTypeAlign(ETy)).value();
990 O << getPTXFundamentalTypeStr(ETy,
false);
1001 if (!Initializer->
isNullValue() && !isa<UndefValue>(Initializer)) {
1003 printScalarConstant(Initializer, O);
1012 "' is not allowed in addrspace(" +
1028 const uint64_t ElementSize =
DL.getTypeStoreSize(ETy);
1035 if (!isa<UndefValue>(Initializer) && !Initializer->
isNullValue()) {
1036 AggBuffer aggBuffer(ElementSize, *
this);
1037 bufferAggregateConstant(Initializer, &aggBuffer);
1038 if (aggBuffer.numSymbols()) {
1040 if (ElementSize % ptrSize ||
1041 !aggBuffer.allSymbolsAligned(ptrSize)) {
1045 "initialized packed aggregate with pointers '" +
1047 "' requires at least PTX ISA version 7.1");
1050 O <<
"[" << ElementSize <<
"] = {";
1051 aggBuffer.printBytes(O);
1054 O <<
" .u" << ptrSize * 8 <<
" ";
1056 O <<
"[" << ElementSize / ptrSize <<
"] = {";
1057 aggBuffer.printWords(O);
1063 O <<
"[" << ElementSize <<
"] = {";
1064 aggBuffer.printBytes(O);
1071 O <<
"[" << ElementSize <<
"]";
1077 O <<
"[" << ElementSize <<
"]";
1088void NVPTXAsmPrinter::AggBuffer::printSymbol(
unsigned nSym,
raw_ostream &os) {
1089 const Value *
v = Symbols[nSym];
1090 const Value *v0 = SymbolsBeforeStripping[nSym];
1091 if (
const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1095 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1096 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1098 Name->print(os, AP.MAI);
1101 Name->print(os, AP.MAI);
1103 }
else if (
const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1104 const MCExpr *Expr = AP.lowerConstantForGV(CExpr,
false);
1105 AP.printMCExpr(*Expr, os);
1110void NVPTXAsmPrinter::AggBuffer::printBytes(
raw_ostream &os) {
1111 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1116 unsigned int InitializerCount =
size;
1119 if (numSymbols() == 0)
1120 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1123 symbolPosInBuffer.push_back(InitializerCount);
1124 unsigned int nSym = 0;
1125 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1126 for (
unsigned int pos = 0; pos < InitializerCount;) {
1129 if (pos != nextSymbolPos) {
1130 os << (
unsigned int)buffer[pos];
1137 std::string symText;
1139 printSymbol(nSym, oss);
1140 for (
unsigned i = 0; i < ptrSize; ++i) {
1144 os <<
"(" << symText <<
")";
1147 nextSymbolPos = symbolPosInBuffer[++nSym];
1148 assert(nextSymbolPos >= pos);
1152void NVPTXAsmPrinter::AggBuffer::printWords(
raw_ostream &os) {
1153 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1154 symbolPosInBuffer.push_back(size);
1155 unsigned int nSym = 0;
1156 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1157 assert(nextSymbolPos % ptrSize == 0);
1158 for (
unsigned int pos = 0; pos <
size; pos += ptrSize) {
1161 if (pos == nextSymbolPos) {
1162 printSymbol(nSym, os);
1163 nextSymbolPos = symbolPosInBuffer[++nSym];
1164 assert(nextSymbolPos % ptrSize == 0);
1165 assert(nextSymbolPos >= pos + ptrSize);
1166 }
else if (ptrSize == 4)
1174 auto It = localDecls.find(F);
1175 if (It == localDecls.end())
1185 O <<
"\t// demoted variable\n\t";
1186 printModuleLevelGV(GV, O,
true, STI);
1190void NVPTXAsmPrinter::emitPTXAddressSpace(
unsigned int AddressSpace,
1213NVPTXAsmPrinter::getPTXFundamentalTypeStr(
Type *Ty,
bool useB4PTR)
const {
1216 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1219 if (NumBits <= 64) {
1220 std::string
name =
"u";
1221 return name + utostr(NumBits);
1237 assert((PtrSize == 64 || PtrSize == 32) &&
"Unexpected pointer size");
1255void NVPTXAsmPrinter::emitPTXGlobalVariable(
const GlobalVariable *GVar,
1268 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1270 O <<
" .attribute(.managed)";
1273 << GVar->
getAlign().value_or(
DL.getPrefTypeAlign(ETy)).value();
1284 O <<
" ." << getPTXFundamentalTypeStr(ETy) <<
" ";
1289 int64_t ElementSize = 0;
1299 ElementSize =
DL.getTypeStoreSize(ETy);
1320 bool IsFirst =
true;
1323 if (
F->arg_empty() && !
F->isVarArg()) {
1331 Type *Ty = Arg.getType();
1332 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1343 const bool IsSurface = !IsSampler && !IsTexture &&
1345 if (IsSampler || IsTexture || IsSurface) {
1352 O <<
".samplerref ";
1362 auto GetOptimalAlignForParam = [TLI, &
DL,
F, &Arg](
Type *Ty) ->
Align {
1365 return StackAlign.
value();
1367 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty,
DL);
1369 Arg.hasByValAttr() ? Arg.getParamAlign() :
MaybeAlign();
1370 return std::max(TypeAlign, ParamAlign.
valueOrOne());
1373 if (Arg.hasByValAttr()) {
1375 Type *ETy = Arg.getParamByValType();
1376 assert(ETy &&
"Param should have byval type");
1382 const Align OptimalAlign =
1383 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1384 : TLI->getFunctionByValParamAlign(
1385 F, ETy, Arg.getParamAlign().valueOrOne(),
DL);
1387 O <<
"\t.param .align " << OptimalAlign.
value() <<
" .b8 " << ParamSym
1388 <<
"[" <<
DL.getTypeAllocSize(ETy) <<
"]";
1397 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1399 O <<
"\t.param .align " << OptimalAlign.
value() <<
" .b8 " << ParamSym
1400 <<
"[" <<
DL.getTypeAllocSize(Ty) <<
"]";
1405 auto *PTy = dyn_cast<PointerType>(Ty);
1406 unsigned PTySizeInBits = 0;
1409 TLI->getPointerTy(
DL, PTy->getAddressSpace()).getSizeInBits();
1410 assert(PTySizeInBits &&
"Invalid pointer size");
1415 O <<
"\t.param .u" << PTySizeInBits <<
" .ptr";
1417 switch (PTy->getAddressSpace()) {
1434 O <<
" .align " << Arg.getParamAlign().valueOrOne().value() <<
" "
1445 O << getPTXFundamentalTypeStr(Ty);
1446 O <<
" " << ParamSym;
1452 if (
auto *ITy = dyn_cast<IntegerType>(Ty)) {
1455 assert(PTySizeInBits &&
"Invalid pointer size");
1456 Size = PTySizeInBits;
1459 O <<
"\t.param .b" <<
Size <<
" " << ParamSym;
1462 if (
F->isVarArg()) {
1466 << TLI->getParamName(F, -1) <<
"[]";
1472void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1488 O <<
"\t.reg .b64 \t%SP;\n"
1489 <<
"\t.reg .b64 \t%SPL;\n";
1491 O <<
"\t.reg .b32 \t%SP;\n"
1492 <<
"\t.reg .b32 \t%SPL;\n";
1504 auto &RCRegMap = VRegMapping[MRI->
getRegClass(VR)];
1505 RCRegMap[VR] = RCRegMap.
size() + 1;
1511 const unsigned N = VRegMapping[RC].
size();
1517 O <<
"\t.reg " << RCName <<
" \t" << RCStr <<
"<" << (
N + 1) <<
">;\n";
1526void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1536 for (
auto &classMap : VRegMapping) {
1537 for (
auto ®isterMapping : classMap.getSecond()) {
1538 auto reg = registerMapping.getFirst();
1544void NVPTXAsmPrinter::printFPConstant(
const ConstantFP *Fp,
1548 unsigned int numHex;
1567 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1571 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1572 printFPConstant(CFP, O);
1575 if (isa<ConstantPointerNull>(CPV)) {
1579 if (
const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1581 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1590 if (
const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1591 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr),
false);
1598void NVPTXAsmPrinter::bufferLEByte(
const Constant *CPV,
int Bytes,
1599 AggBuffer *AggBuffer) {
1601 int AllocSize =
DL.getTypeAllocSize(CPV->
getType());
1605 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1610 auto AddIntToBuffer = [AggBuffer, Bytes](
const APInt &Val) {
1611 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1617 for (
unsigned I = 0;
I < NumBytes - 1; ++
I) {
1618 Buf[
I] = Val.extractBitsAsZExtValue(8,
I * 8);
1620 size_t LastBytePosition = (NumBytes - 1) * 8;
1621 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1623 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1624 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1629 if (
const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1633 if (
const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1634 if (
const auto *CI =
1639 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1640 Value *
V = Cexpr->getOperand(0)->stripPointerCasts();
1641 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1642 AggBuffer->addZeros(AllocSize);
1653 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1657 if (
const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1658 AggBuffer->addSymbol(GVar, GVar);
1659 }
else if (
const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1660 const Value *
v = Cexpr->stripPointerCasts();
1661 AggBuffer->addSymbol(v, Cexpr);
1663 AggBuffer->addZeros(AllocSize);
1670 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1671 bufferAggregateConstant(CPV, AggBuffer);
1672 if (Bytes > AllocSize)
1673 AggBuffer->addZeros(Bytes - AllocSize);
1674 }
else if (isa<ConstantAggregateZero>(CPV))
1675 AggBuffer->addZeros(Bytes);
1686void NVPTXAsmPrinter::bufferAggregateConstant(
const Constant *CPV,
1687 AggBuffer *aggBuffer) {
1690 auto ExtendBuffer = [](
APInt Val, AggBuffer *Buffer) {
1696 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1697 ExtendBuffer(CI->
getValue(), aggBuffer);
1702 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1703 if (CFP->getType()->isFP128Ty()) {
1704 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1710 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1712 bufferLEByte(cast<Constant>(
Op), 0, aggBuffer);
1716 if (
const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1717 for (
unsigned I :
llvm::seq(CDS->getNumElements()))
1718 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(
I)), 0, aggBuffer);
1722 if (isa<ConstantStruct>(CPV)) {
1727 ?
DL.getStructLayout(ST)->getElementOffset(0) +
1728 DL.getTypeAllocSize(ST)
1729 :
DL.getStructLayout(ST)->getElementOffset(
I + 1);
1730 int Bytes = EndOffset -
DL.getStructLayout(ST)->getElementOffset(
I);
1731 bufferLEByte(cast<Constant>(CPV->
getOperand(
I)), Bytes, aggBuffer);
1744NVPTXAsmPrinter::lowerConstantForGV(
const Constant *CV,
1745 bool ProcessingGeneric)
const {
1751 if (
const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1754 if (
const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1756 if (ProcessingGeneric)
1766 switch (
CE->getOpcode()) {
1770 case Instruction::AddrSpaceCast: {
1773 if (DstTy->getAddressSpace() == 0)
1774 return lowerConstantForGV(cast<const Constant>(
CE->getOperand(0)),
true);
1779 case Instruction::GetElementPtr: {
1783 APInt OffsetAI(
DL.getPointerTypeSizeInBits(
CE->getType()), 0);
1784 cast<GEPOperator>(CE)->accumulateConstantOffset(
DL, OffsetAI);
1786 const MCExpr *
Base = lowerConstantForGV(
CE->getOperand(0),
1791 int64_t
Offset = OffsetAI.getSExtValue();
1796 case Instruction::Trunc:
1802 case Instruction::BitCast:
1803 return lowerConstantForGV(
CE->getOperand(0), ProcessingGeneric);
1805 case Instruction::IntToPtr: {
1814 return lowerConstantForGV(
Op, ProcessingGeneric);
1819 case Instruction::PtrToInt: {
1825 Type *Ty =
CE->getType();
1827 const MCExpr *OpExpr = lowerConstantForGV(
Op, ProcessingGeneric);
1831 if (
DL.getTypeAllocSize(Ty) ==
DL.getTypeAllocSize(
Op->getType()))
1837 unsigned InBits =
DL.getTypeAllocSizeInBits(
Op->getType());
1844 case Instruction::Add: {
1845 const MCExpr *
LHS = lowerConstantForGV(
CE->getOperand(0), ProcessingGeneric);
1846 const MCExpr *
RHS = lowerConstantForGV(
CE->getOperand(1), ProcessingGeneric);
1847 switch (
CE->getOpcode()) {
1859 return lowerConstantForGV(
C, ProcessingGeneric);
1864 OS <<
"Unsupported expression in static initializer: ";
1865 CE->printAsOperand(
OS,
false,
1876bool NVPTXAsmPrinter::PrintAsmOperand(
const MachineInstr *
MI,
unsigned OpNo,
1878 if (ExtraCode && ExtraCode[0]) {
1879 if (ExtraCode[1] != 0)
1882 switch (ExtraCode[0]) {
1891 printOperand(
MI, OpNo, O);
1896bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
const MachineInstr *
MI,
1898 const char *ExtraCode,
1900 if (ExtraCode && ExtraCode[0])
1904 printMemOperand(
MI, OpNo, O);
1910void NVPTXAsmPrinter::printOperand(
const MachineInstr *
MI,
unsigned OpNum,
1916 if (MO.
getReg() == NVPTX::VRDepot)
1921 emitVirtualRegister(MO.
getReg(), O);
1946void NVPTXAsmPrinter::printMemOperand(
const MachineInstr *
MI,
unsigned OpNum,
1948 printOperand(
MI, OpNum, O);
1950 if (Modifier && strcmp(Modifier,
"add") == 0) {
1952 printOperand(
MI, OpNum + 1, O);
1954 if (
MI->getOperand(OpNum + 1).isImm() &&
1955 MI->getOperand(OpNum + 1).getImm() == 0)
1958 printOperand(
MI, OpNum + 1, O);
1969LLVMInitializeNVPTXAsmPrinter() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_EXTERNAL_VISIBILITY
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
Register const TargetRegisterInfo * TRI
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallString class.
This file defines the SmallVector class.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getBitWidth() const
Return the number of bits in the APInt.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
TargetMachine & TM
Target machine description.
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
MachineFunction * MF
The current machine function.
bool hasDebugInfo() const
Returns true if valid debug info is present.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
const DataLayout & getDataLayout() const
Return information about data layout.
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
A constant value that is initialized with an expression using other constant values.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Subprogram description. Uses SubclassData1.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Diagnostic information for unsupported feature in backend.
DISubprogram * getSubprogram() const
Get the attached subprogram.
LLVM_ABI const GlobalObject * getAliaseeObject() const
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void printExpr(raw_ostream &, const MCExpr &) const
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCAsmInfo * getAsmInfo() const
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
Represent a reference to a symbol from inside an expression.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool def_empty(Register RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
A Module instance is used to store all the information related to an LLVM module.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
void clearDebugRegisterMap() const
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Class to represent struct types.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isPointerTy() const
True if this is an instance of PointerType.
@ HalfTyID
16-bit floating point type
@ VoidTyID
type with no size
@ FloatTyID
32-bit floating point type
@ IntegerTyID
Arbitrary bit width integers.
@ FixedVectorTyID
Fixed width SIMD vector type.
@ BFloatTyID
16-bit floating point type (7-bit significand)
@ DoubleTyID
64-bit floating point type
@ FP128TyID
128-bit floating point type (112-bit significand)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
uint64_t read64le(const void *P)
uint32_t read32le(const void *P)
This is an optimization pass for GlobalISel generic memory operations.
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
std::optional< unsigned > getMaxNReg(const Function &F)
StringRef getSamplerName(const Value &V)
bool isImageReadWrite(const Value &V)
bool isImageReadOnly(const Value &V)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< unsigned > getMinCTASm(const Function &F)
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
bool isSampler(const Value &V)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isSurface(const Value &V)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
std::optional< unsigned > getMaxClusterRank(const Function &F)
StringRef getTextureName(const Value &V)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
StringRef getSurfaceName(const Value &V)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool isTexture(const Value &V)
bool isImageWriteOnly(const Value &V)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...