LLVM 21.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/ADT/Twine.h"
50#include "llvm/IR/Attributes.h"
51#include "llvm/IR/BasicBlock.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/Constants.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/DebugLoc.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/GlobalAlias.h"
61#include "llvm/IR/GlobalValue.h"
63#include "llvm/IR/Instruction.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/Module.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/MC/MCExpr.h"
70#include "llvm/MC/MCInst.h"
71#include "llvm/MC/MCInstrDesc.h"
72#include "llvm/MC/MCStreamer.h"
73#include "llvm/MC/MCSymbol.h"
78#include "llvm/Support/Endian.h"
85#include <cassert>
86#include <cstdint>
87#include <cstring>
88#include <string>
89#include <utility>
90#include <vector>
91
92using namespace llvm;
93
94#define DEPOTNAME "__local_depot"
95
96/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
97/// depends.
98static void
101 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
102 Globals.insert(GV);
103 else {
104 if (const User *U = dyn_cast<User>(V)) {
105 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
106 DiscoverDependentGlobals(U->getOperand(i), Globals);
107 }
108 }
109 }
110}
111
112/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
113/// instances to be emitted, but only after any dependents have been added
114/// first.s
115static void
120 // Have we already visited this one?
121 if (Visited.count(GV))
122 return;
123
124 // Do we have a circular dependency?
125 if (!Visiting.insert(GV).second)
126 report_fatal_error("Circular dependency found in global variable set");
127
128 // Make sure we visit all dependents first
130 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
131 DiscoverDependentGlobals(GV->getOperand(i), Others);
132
133 for (const GlobalVariable *GV : Others)
134 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
135
136 // Now we can visit ourself
137 Order.push_back(GV);
138 Visited.insert(GV);
139 Visiting.erase(GV);
140}
141
142void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
143 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
144 getSubtargetInfo().getFeatureBits());
145
146 MCInst Inst;
147 lowerToMCInst(MI, Inst);
149}
150
151// Handle symbol backtracking for targets that do not support image handles
152bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
153 unsigned OpNo, MCOperand &MCOp) {
154 const MachineOperand &MO = MI->getOperand(OpNo);
155 const MCInstrDesc &MCID = MI->getDesc();
156
157 if (MCID.TSFlags & NVPTXII::IsTexFlag) {
158 // This is a texture fetch, so operand 4 is a texref and operand 5 is
159 // a samplerref
160 if (OpNo == 4 && MO.isImm()) {
161 lowerImageHandleSymbol(MO.getImm(), MCOp);
162 return true;
163 }
164 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
165 lowerImageHandleSymbol(MO.getImm(), MCOp);
166 return true;
167 }
168
169 return false;
170 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
171 unsigned VecSize =
172 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
173
174 // For a surface load of vector size N, the Nth operand will be the surfref
175 if (OpNo == VecSize && MO.isImm()) {
176 lowerImageHandleSymbol(MO.getImm(), MCOp);
177 return true;
178 }
179
180 return false;
181 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
182 // This is a surface store, so operand 0 is a surfref
183 if (OpNo == 0 && MO.isImm()) {
184 lowerImageHandleSymbol(MO.getImm(), MCOp);
185 return true;
186 }
187
188 return false;
189 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
190 // This is a query, so operand 1 is a surfref/texref
191 if (OpNo == 1 && MO.isImm()) {
192 lowerImageHandleSymbol(MO.getImm(), MCOp);
193 return true;
194 }
195
196 return false;
197 }
198
199 return false;
200}
201
202void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
203 // Ewwww
204 TargetMachine &TM = const_cast<TargetMachine &>(MF->getTarget());
205 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine &>(TM);
207 StringRef Sym = MFI->getImageHandleSymbol(Index);
208 StringRef SymName = nvTM.getStrPool().save(Sym);
209 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName));
210}
211
212void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
213 OutMI.setOpcode(MI->getOpcode());
214 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
215 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
216 const MachineOperand &MO = MI->getOperand(0);
217 OutMI.addOperand(GetSymbolRef(
219 return;
220 }
221
222 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
223 const MachineOperand &MO = MI->getOperand(i);
224
225 MCOperand MCOp;
226 if (lowerImageHandleOperand(MI, i, MCOp)) {
227 OutMI.addOperand(MCOp);
228 continue;
229 }
230
231 if (lowerOperand(MO, MCOp))
232 OutMI.addOperand(MCOp);
233 }
234}
235
236bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
237 MCOperand &MCOp) {
238 switch (MO.getType()) {
239 default: llvm_unreachable("unknown operand type");
241 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
242 break;
244 MCOp = MCOperand::createImm(MO.getImm());
245 break;
248 MO.getMBB()->getSymbol(), OutContext));
249 break;
251 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
252 break;
254 MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
255 break;
257 const ConstantFP *Cnt = MO.getFPImm();
258 const APFloat &Val = Cnt->getValueAPF();
259
260 switch (Cnt->getType()->getTypeID()) {
261 default: report_fatal_error("Unsupported FP type"); break;
262 case Type::HalfTyID:
265 break;
266 case Type::BFloatTyID:
269 break;
270 case Type::FloatTyID:
273 break;
274 case Type::DoubleTyID:
277 break;
278 }
279 break;
280 }
281 }
282 return true;
283}
284
285unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
287 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
288
289 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
290 unsigned RegNum = RegMap[Reg];
291
292 // Encode the register class in the upper 4 bits
293 // Must be kept in sync with NVPTXInstPrinter::printRegName
294 unsigned Ret = 0;
295 if (RC == &NVPTX::Int1RegsRegClass) {
296 Ret = (1 << 28);
297 } else if (RC == &NVPTX::Int16RegsRegClass) {
298 Ret = (2 << 28);
299 } else if (RC == &NVPTX::Int32RegsRegClass) {
300 Ret = (3 << 28);
301 } else if (RC == &NVPTX::Int64RegsRegClass) {
302 Ret = (4 << 28);
303 } else if (RC == &NVPTX::Float32RegsRegClass) {
304 Ret = (5 << 28);
305 } else if (RC == &NVPTX::Float64RegsRegClass) {
306 Ret = (6 << 28);
307 } else if (RC == &NVPTX::Int128RegsRegClass) {
308 Ret = (7 << 28);
309 } else {
310 report_fatal_error("Bad register class");
311 }
312
313 // Insert the vreg number
314 Ret |= (RegNum & 0x0FFFFFFF);
315 return Ret;
316 } else {
317 // Some special-use registers are actually physical registers.
318 // Encode this as the register class ID of 0 and the real register ID.
319 return Reg & 0x0FFFFFFF;
320 }
321}
322
323MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
324 const MCExpr *Expr;
326 OutContext);
327 return MCOperand::createExpr(Expr);
328}
329
330static bool ShouldPassAsArray(Type *Ty) {
331 return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
332 Ty->isHalfTy() || Ty->isBFloatTy();
333}
334
335void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
336 const DataLayout &DL = getDataLayout();
338 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
339
340 Type *Ty = F->getReturnType();
341 if (Ty->getTypeID() == Type::VoidTyID)
342 return;
343 O << " (";
344
345 if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) &&
346 !ShouldPassAsArray(Ty)) {
347 unsigned size = 0;
348 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
349 size = ITy->getBitWidth();
350 } else {
351 assert(Ty->isFloatingPointTy() && "Floating point type expected here");
353 }
355 O << ".param .b" << size << " func_retval0";
356 } else if (isa<PointerType>(Ty)) {
357 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
358 << " func_retval0";
359 } else if (ShouldPassAsArray(Ty)) {
360 unsigned totalsz = DL.getTypeAllocSize(Ty);
361 Align RetAlignment = TLI->getFunctionArgumentAlignment(
363 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
364 << totalsz << "]";
365 } else
366 llvm_unreachable("Unknown return type");
367 O << ") ";
368}
369
370void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
371 raw_ostream &O) {
372 const Function &F = MF.getFunction();
373 printReturnValStr(&F, O);
374}
375
376// Return true if MBB is the header of a loop marked with
377// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
378bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
379 const MachineBasicBlock &MBB) const {
380 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
381 // We insert .pragma "nounroll" only to the loop header.
382 if (!LI.isLoopHeader(&MBB))
383 return false;
384
385 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
386 // we iterate through each back edge of the loop with header MBB, and check
387 // whether its metadata contains llvm.loop.unroll.disable.
388 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
389 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
390 // Edges from other loops to MBB are not back edges.
391 continue;
392 }
393 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
394 if (MDNode *LoopID =
395 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
396 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
397 return true;
398 if (MDNode *UnrollCountMD =
399 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
400 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
401 ->isOne())
402 return true;
403 }
404 }
405 }
406 }
407 return false;
408}
409
410void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
412 if (isLoopHeaderOfNoUnroll(MBB))
413 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
414}
415
416void NVPTXAsmPrinter::emitFunctionEntryLabel() {
419
420 if (!GlobalsEmitted) {
421 emitGlobals(*MF->getFunction().getParent());
422 GlobalsEmitted = true;
423 }
424
425 // Set up
426 MRI = &MF->getRegInfo();
427 F = &MF->getFunction();
428 emitLinkageDirective(F, O);
429 if (isKernelFunction(*F))
430 O << ".entry ";
431 else {
432 O << ".func ";
433 printReturnValStr(*MF, O);
434 }
435
437
438 emitFunctionParamList(F, O);
439 O << "\n";
440
441 if (isKernelFunction(*F))
442 emitKernelFunctionDirectives(*F, O);
443
445 O << ".noreturn";
446
447 OutStreamer->emitRawText(O.str());
448
449 VRegMapping.clear();
450 // Emit open brace for function body.
451 OutStreamer->emitRawText(StringRef("{\n"));
452 setAndEmitFunctionVirtualRegisters(*MF);
453 encodeDebugInfoRegisterNumbers(*MF);
454 // Emit initial .loc debug directive for correct relocation symbol data.
455 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
456 assert(SP->getUnit());
457 if (!SP->getUnit()->isDebugDirectivesOnly())
459 }
460}
461
463 bool Result = AsmPrinter::runOnMachineFunction(F);
464 // Emit closing brace for the body of function F.
465 // The closing brace must be emitted here because we need to emit additional
466 // debug labels/data after the last basic block.
467 // We need to emit the closing brace here because we don't have function that
468 // finished emission of the function body.
469 OutStreamer->emitRawText(StringRef("}\n"));
470 return Result;
471}
472
473void NVPTXAsmPrinter::emitFunctionBodyStart() {
475 raw_svector_ostream O(Str);
476 emitDemotedVars(&MF->getFunction(), O);
477 OutStreamer->emitRawText(O.str());
478}
479
480void NVPTXAsmPrinter::emitFunctionBodyEnd() {
481 VRegMapping.clear();
482}
483
487 return OutContext.getOrCreateSymbol(Str);
488}
489
490void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
491 Register RegNo = MI->getOperand(0).getReg();
492 if (RegNo.isVirtual()) {
493 OutStreamer->AddComment(Twine("implicit-def: ") +
495 } else {
496 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
497 OutStreamer->AddComment(Twine("implicit-def: ") +
498 STI.getRegisterInfo()->getName(RegNo));
499 }
500 OutStreamer->addBlankLine();
501}
502
503void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
504 raw_ostream &O) const {
505 // If the NVVM IR has some of reqntid* specified, then output
506 // the reqntid directive, and set the unspecified ones to 1.
507 // If none of Reqntid* is specified, don't output reqntid directive.
508 std::optional<unsigned> Reqntidx = getReqNTIDx(F);
509 std::optional<unsigned> Reqntidy = getReqNTIDy(F);
510 std::optional<unsigned> Reqntidz = getReqNTIDz(F);
511
512 if (Reqntidx || Reqntidy || Reqntidz)
513 O << ".reqntid " << Reqntidx.value_or(1) << ", " << Reqntidy.value_or(1)
514 << ", " << Reqntidz.value_or(1) << "\n";
515
516 // If the NVVM IR has some of maxntid* specified, then output
517 // the maxntid directive, and set the unspecified ones to 1.
518 // If none of maxntid* is specified, don't output maxntid directive.
519 std::optional<unsigned> Maxntidx = getMaxNTIDx(F);
520 std::optional<unsigned> Maxntidy = getMaxNTIDy(F);
521 std::optional<unsigned> Maxntidz = getMaxNTIDz(F);
522
523 if (Maxntidx || Maxntidy || Maxntidz)
524 O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1)
525 << ", " << Maxntidz.value_or(1) << "\n";
526
527 if (const auto Mincta = getMinCTASm(F))
528 O << ".minnctapersm " << *Mincta << "\n";
529
530 if (const auto Maxnreg = getMaxNReg(F))
531 O << ".maxnreg " << *Maxnreg << "\n";
532
533 // .maxclusterrank directive requires SM_90 or higher, make sure that we
534 // filter it out for lower SM versions, as it causes a hard ptxas crash.
535 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
536 const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
537
538 if (STI->getSmVersion() >= 90) {
539 std::optional<unsigned> ClusterX = getClusterDimx(F);
540 std::optional<unsigned> ClusterY = getClusterDimy(F);
541 std::optional<unsigned> ClusterZ = getClusterDimz(F);
542
543 if (ClusterX || ClusterY || ClusterZ) {
544 O << ".explicitcluster\n";
545 if (ClusterX.value_or(1) != 0) {
546 assert(ClusterY.value_or(1) && ClusterZ.value_or(1) &&
547 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
548 "should be non-zero as well");
549
550 O << ".reqnctapercluster " << ClusterX.value_or(1) << ", "
551 << ClusterY.value_or(1) << ", " << ClusterZ.value_or(1) << "\n";
552 } else {
553 assert(!ClusterY.value_or(1) && !ClusterZ.value_or(1) &&
554 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
555 "should be 0 as well");
556 }
557 }
558 if (const auto Maxclusterrank = getMaxClusterRank(F))
559 O << ".maxclusterrank " << *Maxclusterrank << "\n";
560 }
561}
562
563std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
564 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
565
566 std::string Name;
567 raw_string_ostream NameStr(Name);
568
569 VRegRCMap::const_iterator I = VRegMapping.find(RC);
570 assert(I != VRegMapping.end() && "Bad register class");
571 const DenseMap<unsigned, unsigned> &RegMap = I->second;
572
573 VRegMap::const_iterator VI = RegMap.find(Reg);
574 assert(VI != RegMap.end() && "Bad virtual register");
575 unsigned MappedVR = VI->second;
576
577 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
578
579 return Name;
580}
581
582void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
583 raw_ostream &O) {
584 O << getVirtualRegisterName(vr);
585}
586
587void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
588 raw_ostream &O) {
589 const Function *F = dyn_cast_or_null<Function>(GA->getAliaseeObject());
590 if (!F || isKernelFunction(*F) || F->isDeclaration())
592 "NVPTX aliasee must be a non-kernel function definition");
593
594 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
596 report_fatal_error("NVPTX aliasee must not be '.weak'");
597
598 emitDeclarationWithName(F, getSymbol(GA), O);
599}
600
601void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
602 emitDeclarationWithName(F, getSymbol(F), O);
603}
604
605void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
606 raw_ostream &O) {
607 emitLinkageDirective(F, O);
608 if (isKernelFunction(*F))
609 O << ".entry ";
610 else
611 O << ".func ";
612 printReturnValStr(F, O);
613 S->print(O, MAI);
614 O << "\n";
615 emitFunctionParamList(F, O);
616 O << "\n";
618 O << ".noreturn";
619 O << ";\n";
620}
621
622static bool usedInGlobalVarDef(const Constant *C) {
623 if (!C)
624 return false;
625
626 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
627 return GV->getName() != "llvm.used";
628 }
629
630 for (const User *U : C->users())
631 if (const Constant *C = dyn_cast<Constant>(U))
633 return true;
634
635 return false;
636}
637
638static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
639 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
640 if (othergv->getName() == "llvm.used")
641 return true;
642 }
643
644 if (const Instruction *instr = dyn_cast<Instruction>(U)) {
645 if (instr->getParent() && instr->getParent()->getParent()) {
646 const Function *curFunc = instr->getParent()->getParent();
647 if (oneFunc && (curFunc != oneFunc))
648 return false;
649 oneFunc = curFunc;
650 return true;
651 } else
652 return false;
653 }
654
655 for (const User *UU : U->users())
656 if (!usedInOneFunc(UU, oneFunc))
657 return false;
658
659 return true;
660}
661
662/* Find out if a global variable can be demoted to local scope.
663 * Currently, this is valid for CUDA shared variables, which have local
664 * scope and global lifetime. So the conditions to check are :
665 * 1. Is the global variable in shared address space?
666 * 2. Does it have local linkage?
667 * 3. Is the global variable referenced only in one function?
668 */
669static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
670 if (!gv->hasLocalLinkage())
671 return false;
672 PointerType *Pty = gv->getType();
673 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
674 return false;
675
676 const Function *oneFunc = nullptr;
677
678 bool flag = usedInOneFunc(gv, oneFunc);
679 if (!flag)
680 return false;
681 if (!oneFunc)
682 return false;
683 f = oneFunc;
684 return true;
685}
686
687static bool useFuncSeen(const Constant *C,
689 for (const User *U : C->users()) {
690 if (const Constant *cu = dyn_cast<Constant>(U)) {
691 if (useFuncSeen(cu, seenMap))
692 return true;
693 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
694 const BasicBlock *bb = I->getParent();
695 if (!bb)
696 continue;
697 const Function *caller = bb->getParent();
698 if (!caller)
699 continue;
700 if (seenMap.contains(caller))
701 return true;
702 }
703 }
704 return false;
705}
706
707void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
709 for (const Function &F : M) {
710 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
711 emitDeclaration(&F, O);
712 continue;
713 }
714
715 if (F.isDeclaration()) {
716 if (F.use_empty())
717 continue;
718 if (F.getIntrinsicID())
719 continue;
720 emitDeclaration(&F, O);
721 continue;
722 }
723 for (const User *U : F.users()) {
724 if (const Constant *C = dyn_cast<Constant>(U)) {
725 if (usedInGlobalVarDef(C)) {
726 // The use is in the initialization of a global variable
727 // that is a function pointer, so print a declaration
728 // for the original function
729 emitDeclaration(&F, O);
730 break;
731 }
732 // Emit a declaration of this function if the function that
733 // uses this constant expr has already been seen.
734 if (useFuncSeen(C, seenMap)) {
735 emitDeclaration(&F, O);
736 break;
737 }
738 }
739
740 if (!isa<Instruction>(U))
741 continue;
742 const Instruction *instr = cast<Instruction>(U);
743 const BasicBlock *bb = instr->getParent();
744 if (!bb)
745 continue;
746 const Function *caller = bb->getParent();
747 if (!caller)
748 continue;
749
750 // If a caller has already been seen, then the caller is
751 // appearing in the module before the callee. so print out
752 // a declaration for the callee.
753 if (seenMap.contains(caller)) {
754 emitDeclaration(&F, O);
755 break;
756 }
757 }
758 seenMap[&F] = true;
759 }
760 for (const GlobalAlias &GA : M.aliases())
761 emitAliasDeclaration(&GA, O);
762}
763
764void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
765 // Construct a default subtarget off of the TargetMachine defaults. The
766 // rest of NVPTX isn't friendly to change subtargets per function and
767 // so the default TargetMachine will have all of the options.
768 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
769 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
770 SmallString<128> Str1;
771 raw_svector_ostream OS1(Str1);
772
773 // Emit header before any dwarf directives are emitted below.
774 emitHeader(M, OS1, *STI);
775 OutStreamer->emitRawText(OS1.str());
776}
777
779 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
780 const NVPTXSubtarget &STI =
781 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
782 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
783 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
784
785 // We need to call the parent's one explicitly.
786 bool Result = AsmPrinter::doInitialization(M);
787
788 GlobalsEmitted = false;
789
790 return Result;
791}
792
793void NVPTXAsmPrinter::emitGlobals(const Module &M) {
794 SmallString<128> Str2;
795 raw_svector_ostream OS2(Str2);
796
797 emitDeclarations(M, OS2);
798
799 // As ptxas does not support forward references of globals, we need to first
800 // sort the list of module-level globals in def-use order. We visit each
801 // global variable in order, and ensure that we emit it *after* its dependent
802 // globals. We use a little extra memory maintaining both a set and a list to
803 // have fast searches while maintaining a strict ordering.
807
808 // Visit each global variable, in order
809 for (const GlobalVariable &I : M.globals())
810 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
811
812 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
813 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
814
815 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
816 const NVPTXSubtarget &STI =
817 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
818
819 // Print out module-level global variables in proper order
820 for (const GlobalVariable *GV : Globals)
821 printModuleLevelGV(GV, OS2, /*processDemoted=*/false, STI);
822
823 OS2 << '\n';
824
825 OutStreamer->emitRawText(OS2.str());
826}
827
828void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
831
832 MCSymbol *Name = getSymbol(&GA);
833
834 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
835 << ";\n";
836
837 OutStreamer->emitRawText(OS.str());
838}
839
840void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
841 const NVPTXSubtarget &STI) {
842 O << "//\n";
843 O << "// Generated by LLVM NVPTX Back-End\n";
844 O << "//\n";
845 O << "\n";
846
847 unsigned PTXVersion = STI.getPTXVersion();
848 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
849
850 O << ".target ";
851 O << STI.getTargetName();
852
853 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
854 if (NTM.getDrvInterface() == NVPTX::NVCL)
855 O << ", texmode_independent";
856
857 bool HasFullDebugInfo = false;
858 for (DICompileUnit *CU : M.debug_compile_units()) {
859 switch(CU->getEmissionKind()) {
862 break;
865 HasFullDebugInfo = true;
866 break;
867 }
868 if (HasFullDebugInfo)
869 break;
870 }
871 if (HasFullDebugInfo)
872 O << ", debug";
873
874 O << "\n";
875
876 O << ".address_size ";
877 if (NTM.is64Bit())
878 O << "64";
879 else
880 O << "32";
881 O << "\n";
882
883 O << "\n";
884}
885
887 // If we did not emit any functions, then the global declarations have not
888 // yet been emitted.
889 if (!GlobalsEmitted) {
890 emitGlobals(M);
891 GlobalsEmitted = true;
892 }
893
894 // call doFinalization
895 bool ret = AsmPrinter::doFinalization(M);
896
898
899 auto *TS =
900 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
901 // Close the last emitted section
902 if (hasDebugInfo()) {
903 TS->closeLastSection();
904 // Emit empty .debug_macinfo section for better support of the empty files.
905 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
906 }
907
908 // Output last DWARF .file directives, if any.
909 TS->outputDwarfFileDirectives();
910
911 return ret;
912}
913
914// This function emits appropriate linkage directives for
915// functions and global variables.
916//
917// extern function declaration -> .extern
918// extern function definition -> .visible
919// external global variable with init -> .visible
920// external without init -> .extern
921// appending -> not allowed, assert.
922// for any linkage other than
923// internal, private, linker_private,
924// linker_private_weak, linker_private_weak_def_auto,
925// we emit -> .weak.
926
927void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
928 raw_ostream &O) {
929 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
930 if (V->hasExternalLinkage()) {
931 if (isa<GlobalVariable>(V)) {
932 const GlobalVariable *GVar = cast<GlobalVariable>(V);
933 if (GVar) {
934 if (GVar->hasInitializer())
935 O << ".visible ";
936 else
937 O << ".extern ";
938 }
939 } else if (V->isDeclaration())
940 O << ".extern ";
941 else
942 O << ".visible ";
943 } else if (V->hasAppendingLinkage()) {
944 std::string msg;
945 msg.append("Error: ");
946 msg.append("Symbol ");
947 if (V->hasName())
948 msg.append(std::string(V->getName()));
949 msg.append("has unsupported appending linkage type");
950 llvm_unreachable(msg.c_str());
951 } else if (!V->hasInternalLinkage() &&
952 !V->hasPrivateLinkage()) {
953 O << ".weak ";
954 }
955 }
956}
957
958void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
959 raw_ostream &O, bool processDemoted,
960 const NVPTXSubtarget &STI) {
961 // Skip meta data
962 if (GVar->hasSection()) {
963 if (GVar->getSection() == "llvm.metadata")
964 return;
965 }
966
967 // Skip LLVM intrinsic global variables
968 if (GVar->getName().starts_with("llvm.") ||
969 GVar->getName().starts_with("nvvm."))
970 return;
971
972 const DataLayout &DL = getDataLayout();
973
974 // GlobalVariables are always constant pointers themselves.
975 Type *ETy = GVar->getValueType();
976
977 if (GVar->hasExternalLinkage()) {
978 if (GVar->hasInitializer())
979 O << ".visible ";
980 else
981 O << ".extern ";
982 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
984 O << ".common ";
985 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
987 GVar->hasCommonLinkage()) {
988 O << ".weak ";
989 }
990
991 if (isTexture(*GVar)) {
992 O << ".global .texref " << getTextureName(*GVar) << ";\n";
993 return;
994 }
995
996 if (isSurface(*GVar)) {
997 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
998 return;
999 }
1000
1001 if (GVar->isDeclaration()) {
1002 // (extern) declarations, no definition or initializer
1003 // Currently the only known declaration is for an automatic __local
1004 // (.shared) promoted to global.
1005 emitPTXGlobalVariable(GVar, O, STI);
1006 O << ";\n";
1007 return;
1008 }
1009
1010 if (isSampler(*GVar)) {
1011 O << ".global .samplerref " << getSamplerName(*GVar);
1012
1013 const Constant *Initializer = nullptr;
1014 if (GVar->hasInitializer())
1015 Initializer = GVar->getInitializer();
1016 const ConstantInt *CI = nullptr;
1017 if (Initializer)
1018 CI = dyn_cast<ConstantInt>(Initializer);
1019 if (CI) {
1020 unsigned sample = CI->getZExtValue();
1021
1022 O << " = { ";
1023
1024 for (int i = 0,
1025 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1026 i < 3; i++) {
1027 O << "addr_mode_" << i << " = ";
1028 switch (addr) {
1029 case 0:
1030 O << "wrap";
1031 break;
1032 case 1:
1033 O << "clamp_to_border";
1034 break;
1035 case 2:
1036 O << "clamp_to_edge";
1037 break;
1038 case 3:
1039 O << "wrap";
1040 break;
1041 case 4:
1042 O << "mirror";
1043 break;
1044 }
1045 O << ", ";
1046 }
1047 O << "filter_mode = ";
1048 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1049 case 0:
1050 O << "nearest";
1051 break;
1052 case 1:
1053 O << "linear";
1054 break;
1055 case 2:
1056 llvm_unreachable("Anisotropic filtering is not supported");
1057 default:
1058 O << "nearest";
1059 break;
1060 }
1061 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1062 O << ", force_unnormalized_coords = 1";
1063 }
1064 O << " }";
1065 }
1066
1067 O << ";\n";
1068 return;
1069 }
1070
1071 if (GVar->hasPrivateLinkage()) {
1072 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1073 return;
1074
1075 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1076 if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1077 return;
1078 if (GVar->use_empty())
1079 return;
1080 }
1081
1082 const Function *demotedFunc = nullptr;
1083 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1084 O << "// " << GVar->getName() << " has been demoted\n";
1085 localDecls[demotedFunc].push_back(GVar);
1086 return;
1087 }
1088
1089 O << ".";
1090 emitPTXAddressSpace(GVar->getAddressSpace(), O);
1091
1092 if (isManaged(*GVar)) {
1093 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1095 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1096 }
1097 O << " .attribute(.managed)";
1098 }
1099
1100 if (MaybeAlign A = GVar->getAlign())
1101 O << " .align " << A->value();
1102 else
1103 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1104
1105 if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1106 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1107 O << " .";
1108 // Special case: ABI requires that we use .u8 for predicates
1109 if (ETy->isIntegerTy(1))
1110 O << "u8";
1111 else
1112 O << getPTXFundamentalTypeStr(ETy, false);
1113 O << " ";
1114 getSymbol(GVar)->print(O, MAI);
1115
1116 // Ptx allows variable initilization only for constant and global state
1117 // spaces.
1118 if (GVar->hasInitializer()) {
1119 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1120 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1121 const Constant *Initializer = GVar->getInitializer();
1122 // 'undef' is treated as there is no value specified.
1123 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1124 O << " = ";
1125 printScalarConstant(Initializer, O);
1126 }
1127 } else {
1128 // The frontend adds zero-initializer to device and constant variables
1129 // that don't have an initial value, and UndefValue to shared
1130 // variables, so skip warning for this case.
1131 if (!GVar->getInitializer()->isNullValue() &&
1132 !isa<UndefValue>(GVar->getInitializer())) {
1133 report_fatal_error("initial value of '" + GVar->getName() +
1134 "' is not allowed in addrspace(" +
1135 Twine(GVar->getAddressSpace()) + ")");
1136 }
1137 }
1138 }
1139 } else {
1140 uint64_t ElementSize = 0;
1141
1142 // Although PTX has direct support for struct type and array type and
1143 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1144 // targets that support these high level field accesses. Structs, arrays
1145 // and vectors are lowered into arrays of bytes.
1146 switch (ETy->getTypeID()) {
1147 case Type::IntegerTyID: // Integers larger than 64 bits
1148 case Type::StructTyID:
1149 case Type::ArrayTyID:
1151 ElementSize = DL.getTypeStoreSize(ETy);
1152 // Ptx allows variable initilization only for constant and
1153 // global state spaces.
1154 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1155 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1156 GVar->hasInitializer()) {
1157 const Constant *Initializer = GVar->getInitializer();
1158 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1159 AggBuffer aggBuffer(ElementSize, *this);
1160 bufferAggregateConstant(Initializer, &aggBuffer);
1161 if (aggBuffer.numSymbols()) {
1162 unsigned int ptrSize = MAI->getCodePointerSize();
1163 if (ElementSize % ptrSize ||
1164 !aggBuffer.allSymbolsAligned(ptrSize)) {
1165 // Print in bytes and use the mask() operator for pointers.
1166 if (!STI.hasMaskOperator())
1168 "initialized packed aggregate with pointers '" +
1169 GVar->getName() +
1170 "' requires at least PTX ISA version 7.1");
1171 O << " .u8 ";
1172 getSymbol(GVar)->print(O, MAI);
1173 O << "[" << ElementSize << "] = {";
1174 aggBuffer.printBytes(O);
1175 O << "}";
1176 } else {
1177 O << " .u" << ptrSize * 8 << " ";
1178 getSymbol(GVar)->print(O, MAI);
1179 O << "[" << ElementSize / ptrSize << "] = {";
1180 aggBuffer.printWords(O);
1181 O << "}";
1182 }
1183 } else {
1184 O << " .b8 ";
1185 getSymbol(GVar)->print(O, MAI);
1186 O << "[" << ElementSize << "] = {";
1187 aggBuffer.printBytes(O);
1188 O << "}";
1189 }
1190 } else {
1191 O << " .b8 ";
1192 getSymbol(GVar)->print(O, MAI);
1193 if (ElementSize) {
1194 O << "[";
1195 O << ElementSize;
1196 O << "]";
1197 }
1198 }
1199 } else {
1200 O << " .b8 ";
1201 getSymbol(GVar)->print(O, MAI);
1202 if (ElementSize) {
1203 O << "[";
1204 O << ElementSize;
1205 O << "]";
1206 }
1207 }
1208 break;
1209 default:
1210 llvm_unreachable("type not supported yet");
1211 }
1212 }
1213 O << ";\n";
1214}
1215
1216void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1217 const Value *v = Symbols[nSym];
1218 const Value *v0 = SymbolsBeforeStripping[nSym];
1219 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1220 MCSymbol *Name = AP.getSymbol(GVar);
1221 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1222 // Is v0 a generic pointer?
1223 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1224 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1225 os << "generic(";
1226 Name->print(os, AP.MAI);
1227 os << ")";
1228 } else {
1229 Name->print(os, AP.MAI);
1230 }
1231 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1232 const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
1233 AP.printMCExpr(*Expr, os);
1234 } else
1235 llvm_unreachable("symbol type unknown");
1236}
1237
1238void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1239 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1240 // Do not emit trailing zero initializers. They will be zero-initialized by
1241 // ptxas. This saves on both space requirements for the generated PTX and on
1242 // memory use by ptxas. (See:
1243 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1244 unsigned int InitializerCount = size;
1245 // TODO: symbols make this harder, but it would still be good to trim trailing
1246 // 0s for aggs with symbols as well.
1247 if (numSymbols() == 0)
1248 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1249 InitializerCount--;
1250
1251 symbolPosInBuffer.push_back(InitializerCount);
1252 unsigned int nSym = 0;
1253 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1254 for (unsigned int pos = 0; pos < InitializerCount;) {
1255 if (pos)
1256 os << ", ";
1257 if (pos != nextSymbolPos) {
1258 os << (unsigned int)buffer[pos];
1259 ++pos;
1260 continue;
1261 }
1262 // Generate a per-byte mask() operator for the symbol, which looks like:
1263 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1264 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1265 std::string symText;
1266 llvm::raw_string_ostream oss(symText);
1267 printSymbol(nSym, oss);
1268 for (unsigned i = 0; i < ptrSize; ++i) {
1269 if (i)
1270 os << ", ";
1271 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1272 os << "(" << symText << ")";
1273 }
1274 pos += ptrSize;
1275 nextSymbolPos = symbolPosInBuffer[++nSym];
1276 assert(nextSymbolPos >= pos);
1277 }
1278}
1279
1280void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1281 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1282 symbolPosInBuffer.push_back(size);
1283 unsigned int nSym = 0;
1284 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1285 assert(nextSymbolPos % ptrSize == 0);
1286 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1287 if (pos)
1288 os << ", ";
1289 if (pos == nextSymbolPos) {
1290 printSymbol(nSym, os);
1291 nextSymbolPos = symbolPosInBuffer[++nSym];
1292 assert(nextSymbolPos % ptrSize == 0);
1293 assert(nextSymbolPos >= pos + ptrSize);
1294 } else if (ptrSize == 4)
1295 os << support::endian::read32le(&buffer[pos]);
1296 else
1297 os << support::endian::read64le(&buffer[pos]);
1298 }
1299}
1300
1301void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1302 auto It = localDecls.find(f);
1303 if (It == localDecls.end())
1304 return;
1305
1306 std::vector<const GlobalVariable *> &gvars = It->second;
1307
1308 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1309 const NVPTXSubtarget &STI =
1310 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1311
1312 for (const GlobalVariable *GV : gvars) {
1313 O << "\t// demoted variable\n\t";
1314 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1315 }
1316}
1317
1318void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1319 raw_ostream &O) const {
1320 switch (AddressSpace) {
1322 O << "local";
1323 break;
1325 O << "global";
1326 break;
1328 O << "const";
1329 break;
1331 O << "shared";
1332 break;
1333 default:
1334 report_fatal_error("Bad address space found while emitting PTX: " +
1336 break;
1337 }
1338}
1339
1340std::string
1341NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1342 switch (Ty->getTypeID()) {
1343 case Type::IntegerTyID: {
1344 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1345 if (NumBits == 1)
1346 return "pred";
1347 else if (NumBits <= 64) {
1348 std::string name = "u";
1349 return name + utostr(NumBits);
1350 } else {
1351 llvm_unreachable("Integer too large");
1352 break;
1353 }
1354 break;
1355 }
1356 case Type::BFloatTyID:
1357 case Type::HalfTyID:
1358 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1359 // PTX assembly.
1360 return "b16";
1361 case Type::FloatTyID:
1362 return "f32";
1363 case Type::DoubleTyID:
1364 return "f64";
1365 case Type::PointerTyID: {
1366 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1367 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1368
1369 if (PtrSize == 64)
1370 if (useB4PTR)
1371 return "b64";
1372 else
1373 return "u64";
1374 else if (useB4PTR)
1375 return "b32";
1376 else
1377 return "u32";
1378 }
1379 default:
1380 break;
1381 }
1382 llvm_unreachable("unexpected type");
1383}
1384
1385void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1386 raw_ostream &O,
1387 const NVPTXSubtarget &STI) {
1388 const DataLayout &DL = getDataLayout();
1389
1390 // GlobalVariables are always constant pointers themselves.
1391 Type *ETy = GVar->getValueType();
1392
1393 O << ".";
1394 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1395 if (isManaged(*GVar)) {
1396 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1398 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1399 }
1400 O << " .attribute(.managed)";
1401 }
1402 if (MaybeAlign A = GVar->getAlign())
1403 O << " .align " << A->value();
1404 else
1405 O << " .align " << (int)DL.getPrefTypeAlign(ETy).value();
1406
1407 // Special case for i128
1408 if (ETy->isIntegerTy(128)) {
1409 O << " .b8 ";
1410 getSymbol(GVar)->print(O, MAI);
1411 O << "[16]";
1412 return;
1413 }
1414
1415 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1416 O << " .";
1417 O << getPTXFundamentalTypeStr(ETy);
1418 O << " ";
1419 getSymbol(GVar)->print(O, MAI);
1420 return;
1421 }
1422
1423 int64_t ElementSize = 0;
1424
1425 // Although PTX has direct support for struct type and array type and LLVM IR
1426 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1427 // support these high level field accesses. Structs and arrays are lowered
1428 // into arrays of bytes.
1429 switch (ETy->getTypeID()) {
1430 case Type::StructTyID:
1431 case Type::ArrayTyID:
1433 ElementSize = DL.getTypeStoreSize(ETy);
1434 O << " .b8 ";
1435 getSymbol(GVar)->print(O, MAI);
1436 O << "[";
1437 if (ElementSize) {
1438 O << ElementSize;
1439 }
1440 O << "]";
1441 break;
1442 default:
1443 llvm_unreachable("type not supported yet");
1444 }
1445}
1446
1447void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1448 const DataLayout &DL = getDataLayout();
1449 const AttributeList &PAL = F->getAttributes();
1450 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1451 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1452 const NVPTXMachineFunctionInfo *MFI =
1453 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1454
1456 unsigned paramIndex = 0;
1457 bool first = true;
1458 bool isKernelFunc = isKernelFunction(*F);
1459
1460 if (F->arg_empty() && !F->isVarArg()) {
1461 O << "()";
1462 return;
1463 }
1464
1465 O << "(\n";
1466
1467 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1468 Type *Ty = I->getType();
1469
1470 if (!first)
1471 O << ",\n";
1472
1473 first = false;
1474
1475 // Handle image/sampler parameters
1476 if (isKernelFunc) {
1477 if (isSampler(*I) || isImage(*I)) {
1478 std::string ParamSym;
1479 raw_string_ostream ParamStr(ParamSym);
1480 ParamStr << F->getName() << "_param_" << paramIndex;
1481 ParamStr.flush();
1482 bool EmitImagePtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1483 if (isImage(*I)) {
1484 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1485 if (EmitImagePtr)
1486 O << "\t.param .u64 .ptr .surfref ";
1487 else
1488 O << "\t.param .surfref ";
1489 O << TLI->getParamName(F, paramIndex);
1490 }
1491 else { // Default image is read_only
1492 if (EmitImagePtr)
1493 O << "\t.param .u64 .ptr .texref ";
1494 else
1495 O << "\t.param .texref ";
1496 O << TLI->getParamName(F, paramIndex);
1497 }
1498 } else {
1499 if (EmitImagePtr)
1500 O << "\t.param .u64 .ptr .samplerref ";
1501 else
1502 O << "\t.param .samplerref ";
1503 O << TLI->getParamName(F, paramIndex);
1504 }
1505 continue;
1506 }
1507 }
1508
1509 auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1510 paramIndex](Type *Ty) -> Align {
1511 if (MaybeAlign StackAlign =
1512 getAlign(*F, paramIndex + AttributeList::FirstArgIndex))
1513 return StackAlign.value();
1514
1515 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1516 MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1517 return std::max(TypeAlign, ParamAlign.valueOrOne());
1518 };
1519
1520 if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1521 if (ShouldPassAsArray(Ty)) {
1522 // Just print .param .align <a> .b8 .param[size];
1523 // <a> = optimal alignment for the element type; always multiple of
1524 // PAL.getParamAlignment
1525 // size = typeallocsize of element type
1526 Align OptimalAlign = getOptimalAlignForParam(Ty);
1527
1528 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1529 O << TLI->getParamName(F, paramIndex);
1530 O << "[" << DL.getTypeAllocSize(Ty) << "]";
1531
1532 continue;
1533 }
1534 // Just a scalar
1535 auto *PTy = dyn_cast<PointerType>(Ty);
1536 unsigned PTySizeInBits = 0;
1537 if (PTy) {
1538 PTySizeInBits =
1539 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1540 assert(PTySizeInBits && "Invalid pointer size");
1541 }
1542
1543 if (isKernelFunc) {
1544 if (PTy) {
1545 O << "\t.param .u" << PTySizeInBits << " .ptr";
1546
1547 switch (PTy->getAddressSpace()) {
1548 default:
1549 break;
1551 O << " .global";
1552 break;
1554 O << " .shared";
1555 break;
1557 O << " .const";
1558 break;
1560 O << " .local";
1561 break;
1562 }
1563
1564 O << " .align " << I->getParamAlign().valueOrOne().value();
1565 O << " " << TLI->getParamName(F, paramIndex);
1566 continue;
1567 }
1568
1569 // non-pointer scalar to kernel func
1570 O << "\t.param .";
1571 // Special case: predicate operands become .u8 types
1572 if (Ty->isIntegerTy(1))
1573 O << "u8";
1574 else
1575 O << getPTXFundamentalTypeStr(Ty);
1576 O << " ";
1577 O << TLI->getParamName(F, paramIndex);
1578 continue;
1579 }
1580 // Non-kernel function, just print .param .b<size> for ABI
1581 // and .reg .b<size> for non-ABI
1582 unsigned sz = 0;
1583 if (isa<IntegerType>(Ty)) {
1584 sz = cast<IntegerType>(Ty)->getBitWidth();
1586 } else if (PTy) {
1587 assert(PTySizeInBits && "Invalid pointer size");
1588 sz = PTySizeInBits;
1589 } else
1590 sz = Ty->getPrimitiveSizeInBits();
1591 O << "\t.param .b" << sz << " ";
1592 O << TLI->getParamName(F, paramIndex);
1593 continue;
1594 }
1595
1596 // param has byVal attribute.
1597 Type *ETy = PAL.getParamByValType(paramIndex);
1598 assert(ETy && "Param should have byval type");
1599
1600 // Print .param .align <a> .b8 .param[size];
1601 // <a> = optimal alignment for the element type; always multiple of
1602 // PAL.getParamAlignment
1603 // size = typeallocsize of element type
1604 Align OptimalAlign =
1605 isKernelFunc
1606 ? getOptimalAlignForParam(ETy)
1607 : TLI->getFunctionByValParamAlign(
1608 F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL);
1609
1610 unsigned sz = DL.getTypeAllocSize(ETy);
1611 O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1612 O << TLI->getParamName(F, paramIndex);
1613 O << "[" << sz << "]";
1614 }
1615
1616 if (F->isVarArg()) {
1617 if (!first)
1618 O << ",\n";
1619 O << "\t.param .align " << STI.getMaxRequiredAlignment();
1620 O << " .b8 ";
1621 O << TLI->getParamName(F, /* vararg */ -1) << "[]";
1622 }
1623
1624 O << "\n)";
1625}
1626
1627void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1628 const MachineFunction &MF) {
1629 SmallString<128> Str;
1631
1632 // Map the global virtual register number to a register class specific
1633 // virtual register number starting from 1 with that class.
1635 //unsigned numRegClasses = TRI->getNumRegClasses();
1636
1637 // Emit the Fake Stack Object
1638 const MachineFrameInfo &MFI = MF.getFrameInfo();
1639 int64_t NumBytes = MFI.getStackSize();
1640 if (NumBytes) {
1641 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1642 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1643 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1644 O << "\t.reg .b64 \t%SP;\n";
1645 O << "\t.reg .b64 \t%SPL;\n";
1646 } else {
1647 O << "\t.reg .b32 \t%SP;\n";
1648 O << "\t.reg .b32 \t%SPL;\n";
1649 }
1650 }
1651
1652 // Go through all virtual registers to establish the mapping between the
1653 // global virtual
1654 // register number and the per class virtual register number.
1655 // We use the per class virtual register number in the ptx output.
1656 unsigned int numVRs = MRI->getNumVirtRegs();
1657 for (unsigned i = 0; i < numVRs; i++) {
1659 const TargetRegisterClass *RC = MRI->getRegClass(vr);
1660 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1661 int n = regmap.size();
1662 regmap.insert(std::make_pair(vr, n + 1));
1663 }
1664
1665 // Emit register declarations
1666 // @TODO: Extract out the real register usage
1667 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1668 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1669 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1670 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1671 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1672 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1673 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1674
1675 // Emit declaration of the virtual registers or 'physical' registers for
1676 // each register class
1677 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1678 const TargetRegisterClass *RC = TRI->getRegClass(i);
1679 DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1680 std::string rcname = getNVPTXRegClassName(RC);
1681 std::string rcStr = getNVPTXRegClassStr(RC);
1682 int n = regmap.size();
1683
1684 // Only declare those registers that may be used.
1685 if (n) {
1686 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1687 << ">;\n";
1688 }
1689 }
1690
1691 OutStreamer->emitRawText(O.str());
1692}
1693
1694/// Translate virtual register numbers in DebugInfo locations to their printed
1695/// encodings, as used by CUDA-GDB.
1696void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1697 const MachineFunction &MF) {
1699 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1700
1701 // Clear the old mapping, and add the new one. This mapping is used after the
1702 // printing of the current function is complete, but before the next function
1703 // is printed.
1704 registerInfo->clearDebugRegisterMap();
1705
1706 for (auto &classMap : VRegMapping) {
1707 for (auto &registerMapping : classMap.getSecond()) {
1708 auto reg = registerMapping.getFirst();
1709 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1710 }
1711 }
1712}
1713
1714void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1715 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1716 bool ignored;
1717 unsigned int numHex;
1718 const char *lead;
1719
1720 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1721 numHex = 8;
1722 lead = "0f";
1724 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1725 numHex = 16;
1726 lead = "0d";
1728 } else
1729 llvm_unreachable("unsupported fp type");
1730
1731 APInt API = APF.bitcastToAPInt();
1732 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1733}
1734
1735void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1736 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1737 O << CI->getValue();
1738 return;
1739 }
1740 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1741 printFPConstant(CFP, O);
1742 return;
1743 }
1744 if (isa<ConstantPointerNull>(CPV)) {
1745 O << "0";
1746 return;
1747 }
1748 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1749 bool IsNonGenericPointer = false;
1750 if (GVar->getType()->getAddressSpace() != 0) {
1751 IsNonGenericPointer = true;
1752 }
1753 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1754 O << "generic(";
1755 getSymbol(GVar)->print(O, MAI);
1756 O << ")";
1757 } else {
1758 getSymbol(GVar)->print(O, MAI);
1759 }
1760 return;
1761 }
1762 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1763 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1764 printMCExpr(*E, O);
1765 return;
1766 }
1767 llvm_unreachable("Not scalar type found in printScalarConstant()");
1768}
1769
1770void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1771 AggBuffer *AggBuffer) {
1772 const DataLayout &DL = getDataLayout();
1773 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1774 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1775 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1776 // only the space allocated by CPV.
1777 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1778 return;
1779 }
1780
1781 // Helper for filling AggBuffer with APInts.
1782 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1783 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1784 SmallVector<unsigned char, 16> Buf(NumBytes);
1785 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1786 // input's bit width, and i1 arrays may not have a length that is a multuple
1787 // of 8. We handle the last byte separately, so we never request out of
1788 // bounds bits.
1789 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1790 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1791 }
1792 size_t LastBytePosition = (NumBytes - 1) * 8;
1793 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1794 Buf[NumBytes - 1] =
1795 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1796 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1797 };
1798
1799 switch (CPV->getType()->getTypeID()) {
1800 case Type::IntegerTyID:
1801 if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1802 AddIntToBuffer(CI->getValue());
1803 break;
1804 }
1805 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1806 if (const auto *CI =
1807 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1808 AddIntToBuffer(CI->getValue());
1809 break;
1810 }
1811 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1812 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1813 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1814 AggBuffer->addZeros(AllocSize);
1815 break;
1816 }
1817 }
1818 llvm_unreachable("unsupported integer const type");
1819 break;
1820
1821 case Type::HalfTyID:
1822 case Type::BFloatTyID:
1823 case Type::FloatTyID:
1824 case Type::DoubleTyID:
1825 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1826 break;
1827
1828 case Type::PointerTyID: {
1829 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1830 AggBuffer->addSymbol(GVar, GVar);
1831 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1832 const Value *v = Cexpr->stripPointerCasts();
1833 AggBuffer->addSymbol(v, Cexpr);
1834 }
1835 AggBuffer->addZeros(AllocSize);
1836 break;
1837 }
1838
1839 case Type::ArrayTyID:
1841 case Type::StructTyID: {
1842 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1843 bufferAggregateConstant(CPV, AggBuffer);
1844 if (Bytes > AllocSize)
1845 AggBuffer->addZeros(Bytes - AllocSize);
1846 } else if (isa<ConstantAggregateZero>(CPV))
1847 AggBuffer->addZeros(Bytes);
1848 else
1849 llvm_unreachable("Unexpected Constant type");
1850 break;
1851 }
1852
1853 default:
1854 llvm_unreachable("unsupported type");
1855 }
1856}
1857
1858void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1859 AggBuffer *aggBuffer) {
1860 const DataLayout &DL = getDataLayout();
1861 int Bytes;
1862
1863 // Integers of arbitrary width
1864 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1865 APInt Val = CI->getValue();
1866 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1868 aggBuffer->addBytes(&Byte, 1, 1);
1869 Val.lshrInPlace(8);
1870 }
1871 return;
1872 }
1873
1874 // Old constants
1875 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1876 if (CPV->getNumOperands())
1877 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1878 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1879 return;
1880 }
1881
1882 if (const ConstantDataSequential *CDS =
1883 dyn_cast<ConstantDataSequential>(CPV)) {
1884 if (CDS->getNumElements())
1885 for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1886 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1887 aggBuffer);
1888 return;
1889 }
1890
1891 if (isa<ConstantStruct>(CPV)) {
1892 if (CPV->getNumOperands()) {
1893 StructType *ST = cast<StructType>(CPV->getType());
1894 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1895 if (i == (e - 1))
1896 Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1897 DL.getTypeAllocSize(ST) -
1898 DL.getStructLayout(ST)->getElementOffset(i);
1899 else
1900 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1901 DL.getStructLayout(ST)->getElementOffset(i);
1902 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1903 }
1904 }
1905 return;
1906 }
1907 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1908}
1909
1910/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1911/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1912/// expressions that are representable in PTX and create
1913/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1914const MCExpr *
1915NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1916 MCContext &Ctx = OutContext;
1917
1918 if (CV->isNullValue() || isa<UndefValue>(CV))
1919 return MCConstantExpr::create(0, Ctx);
1920
1921 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1922 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1923
1924 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1925 const MCSymbolRefExpr *Expr =
1927 if (ProcessingGeneric) {
1928 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1929 } else {
1930 return Expr;
1931 }
1932 }
1933
1934 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1935 if (!CE) {
1936 llvm_unreachable("Unknown constant value to lower!");
1937 }
1938
1939 switch (CE->getOpcode()) {
1940 default:
1941 break; // Error
1942
1943 case Instruction::AddrSpaceCast: {
1944 // Strip the addrspacecast and pass along the operand
1945 PointerType *DstTy = cast<PointerType>(CE->getType());
1946 if (DstTy->getAddressSpace() == 0)
1947 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1948
1949 break; // Error
1950 }
1951
1952 case Instruction::GetElementPtr: {
1953 const DataLayout &DL = getDataLayout();
1954
1955 // Generate a symbolic expression for the byte address
1956 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1957 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1958
1959 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1960 ProcessingGeneric);
1961 if (!OffsetAI)
1962 return Base;
1963
1964 int64_t Offset = OffsetAI.getSExtValue();
1966 Ctx);
1967 }
1968
1969 case Instruction::Trunc:
1970 // We emit the value and depend on the assembler to truncate the generated
1971 // expression properly. This is important for differences between
1972 // blockaddress labels. Since the two labels are in the same function, it
1973 // is reasonable to treat their delta as a 32-bit value.
1974 [[fallthrough]];
1975 case Instruction::BitCast:
1976 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1977
1978 case Instruction::IntToPtr: {
1979 const DataLayout &DL = getDataLayout();
1980
1981 // Handle casts to pointers by changing them into casts to the appropriate
1982 // integer type. This promotes constant folding and simplifies this code.
1983 Constant *Op = CE->getOperand(0);
1984 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1985 /*IsSigned*/ false, DL);
1986 if (Op)
1987 return lowerConstantForGV(Op, ProcessingGeneric);
1988
1989 break; // Error
1990 }
1991
1992 case Instruction::PtrToInt: {
1993 const DataLayout &DL = getDataLayout();
1994
1995 // Support only foldable casts to/from pointers that can be eliminated by
1996 // changing the pointer to the appropriately sized integer type.
1997 Constant *Op = CE->getOperand(0);
1998 Type *Ty = CE->getType();
1999
2000 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2001
2002 // We can emit the pointer value into this slot if the slot is an
2003 // integer slot equal to the size of the pointer.
2004 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2005 return OpExpr;
2006
2007 // Otherwise the pointer is smaller than the resultant integer, mask off
2008 // the high bits so we are sure to get a proper truncation if the input is
2009 // a constant expr.
2010 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2011 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2012 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2013 }
2014
2015 // The MC library also has a right-shift operator, but it isn't consistently
2016 // signed or unsigned between different targets.
2017 case Instruction::Add: {
2018 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2019 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2020 switch (CE->getOpcode()) {
2021 default: llvm_unreachable("Unknown binary operator constant cast expr");
2022 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2023 }
2024 }
2025 }
2026
2027 // If the code isn't optimized, there may be outstanding folding
2028 // opportunities. Attempt to fold the expression using DataLayout as a
2029 // last resort before giving up.
2031 if (C != CE)
2032 return lowerConstantForGV(C, ProcessingGeneric);
2033
2034 // Otherwise report the problem to the user.
2035 std::string S;
2037 OS << "Unsupported expression in static initializer: ";
2038 CE->printAsOperand(OS, /*PrintType=*/false,
2039 !MF ? nullptr : MF->getFunction().getParent());
2040 report_fatal_error(Twine(OS.str()));
2041}
2042
2043// Copy of MCExpr::print customized for NVPTX
2044void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2045 switch (Expr.getKind()) {
2046 case MCExpr::Target:
2047 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2048 case MCExpr::Constant:
2049 OS << cast<MCConstantExpr>(Expr).getValue();
2050 return;
2051
2052 case MCExpr::SymbolRef: {
2053 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2054 const MCSymbol &Sym = SRE.getSymbol();
2055 Sym.print(OS, MAI);
2056 return;
2057 }
2058
2059 case MCExpr::Unary: {
2060 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2061 switch (UE.getOpcode()) {
2062 case MCUnaryExpr::LNot: OS << '!'; break;
2063 case MCUnaryExpr::Minus: OS << '-'; break;
2064 case MCUnaryExpr::Not: OS << '~'; break;
2065 case MCUnaryExpr::Plus: OS << '+'; break;
2066 }
2067 printMCExpr(*UE.getSubExpr(), OS);
2068 return;
2069 }
2070
2071 case MCExpr::Binary: {
2072 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2073
2074 // Only print parens around the LHS if it is non-trivial.
2075 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2076 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2077 printMCExpr(*BE.getLHS(), OS);
2078 } else {
2079 OS << '(';
2080 printMCExpr(*BE.getLHS(), OS);
2081 OS<< ')';
2082 }
2083
2084 switch (BE.getOpcode()) {
2085 case MCBinaryExpr::Add:
2086 // Print "X-42" instead of "X+-42".
2087 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2088 if (RHSC->getValue() < 0) {
2089 OS << RHSC->getValue();
2090 return;
2091 }
2092 }
2093
2094 OS << '+';
2095 break;
2096 default: llvm_unreachable("Unhandled binary operator");
2097 }
2098
2099 // Only print parens around the LHS if it is non-trivial.
2100 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2101 printMCExpr(*BE.getRHS(), OS);
2102 } else {
2103 OS << '(';
2104 printMCExpr(*BE.getRHS(), OS);
2105 OS << ')';
2106 }
2107 return;
2108 }
2109 }
2110
2111 llvm_unreachable("Invalid expression kind!");
2112}
2113
2114/// PrintAsmOperand - Print out an operand for an inline asm expression.
2115///
2116bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2117 const char *ExtraCode, raw_ostream &O) {
2118 if (ExtraCode && ExtraCode[0]) {
2119 if (ExtraCode[1] != 0)
2120 return true; // Unknown modifier.
2121
2122 switch (ExtraCode[0]) {
2123 default:
2124 // See if this is a generic print operand
2125 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2126 case 'r':
2127 break;
2128 }
2129 }
2130
2131 printOperand(MI, OpNo, O);
2132
2133 return false;
2134}
2135
2136bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2137 unsigned OpNo,
2138 const char *ExtraCode,
2139 raw_ostream &O) {
2140 if (ExtraCode && ExtraCode[0])
2141 return true; // Unknown modifier
2142
2143 O << '[';
2144 printMemOperand(MI, OpNo, O);
2145 O << ']';
2146
2147 return false;
2148}
2149
2150void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2151 raw_ostream &O) {
2152 const MachineOperand &MO = MI->getOperand(OpNum);
2153 switch (MO.getType()) {
2155 if (MO.getReg().isPhysical()) {
2156 if (MO.getReg() == NVPTX::VRDepot)
2158 else
2160 } else {
2161 emitVirtualRegister(MO.getReg(), O);
2162 }
2163 break;
2164
2166 O << MO.getImm();
2167 break;
2168
2170 printFPConstant(MO.getFPImm(), O);
2171 break;
2172
2174 PrintSymbolOperand(MO, O);
2175 break;
2176
2178 MO.getMBB()->getSymbol()->print(O, MAI);
2179 break;
2180
2181 default:
2182 llvm_unreachable("Operand type not supported.");
2183 }
2184}
2185
2186void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2187 raw_ostream &O, const char *Modifier) {
2188 printOperand(MI, OpNum, O);
2189
2190 if (Modifier && strcmp(Modifier, "add") == 0) {
2191 O << ", ";
2192 printOperand(MI, OpNum + 1, O);
2193 } else {
2194 if (MI->getOperand(OpNum + 1).isImm() &&
2195 MI->getOperand(OpNum + 1).getImm() == 0)
2196 return; // don't print ',0' or '+0'
2197 O << "+";
2198 printOperand(MI, OpNum + 1, O);
2199 }
2200}
2201
2202// Force static initialization.
2206}
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:128
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define DEPOTNAME
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
static bool usedInGlobalVarDef(const Constant *C)
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
static bool ShouldPassAsArray(Type *Ty)
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const char * name
Definition: SMEABIPass.cpp:46
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5463
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:617
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:697
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:428
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:90
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:93
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:105
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: AsmPrinter.h:440
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:459
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:404
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:124
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:97
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCSymbol * GetExternalSymbolSymbol(Twine Sym) const
Return the MCSymbol for the specified ExternalSymbol.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:390
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:102
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:412
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:432
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:423
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:220
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:587
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
const APFloat & getValueAPF() const
Definition: Constants.h:314
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This is an important base class in LLVM.
Definition: Constant.h:42
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Subprogram description.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1874
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:614
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:117
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:109
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:516
bool hasExternalLinkage() const
Definition: GlobalValue.h:512
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:315
bool hasLocalLinkage() const
Definition: GlobalValue.h:529
bool hasPrivateLinkage() const
Definition: GlobalValue.h:528
unsigned getAddressSpace() const
Definition: GlobalValue.h:206
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:295
bool hasWeakLinkage() const
Definition: GlobalValue.h:523
bool hasCommonLinkage() const
Definition: GlobalValue.h:533
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:513
Type * getValueType() const
Definition: GlobalValue.h:297
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:449
Binary assembler expressions.
Definition: MCExpr.h:493
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:640
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:643
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:542
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:537
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:637
@ Add
Addition.
Definition: MCExpr.h:496
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:222
Context object for machine code objects.
Definition: MCContext.h:83
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:212
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
@ Unary
Unary expressions.
Definition: MCExpr.h:40
@ Constant
Constant expressions.
Definition: MCExpr.h:38
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:39
@ Target
Target specific expression.
Definition: MCExpr.h:41
@ Binary
Binary expressions.
Definition: MCExpr.h:37
ExprKind getKind() const
Definition: MCExpr.h:78
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
void addOperand(const MCOperand Op)
Definition: MCInst.h:211
void setOpcode(unsigned Op)
Definition: MCInst.h:198
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:37
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:163
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:135
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:142
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
const MCSymbol & getSymbol() const
Definition: MCExpr.h:411
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
Unary assembler expressions.
Definition: MCExpr.h:437
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:480
@ Minus
Unary minus.
Definition: MCExpr.h:441
@ Plus
Unary plus.
Definition: MCExpr.h:443
@ Not
Bitwise negation.
Definition: MCExpr.h:442
@ LNot
Logical negation.
Definition: MCExpr.h:440
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:483
Metadata node.
Definition: Metadata.h:1073
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Definition: MachineInstr.h:71
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:59
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
StringRef getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
UniqueStringSaver & getStrPool() const
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:712
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
Class to represent struct types.
Definition: DerivedTypes.h:218
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:81
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition: Type.h:74
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:73
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:70
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:75
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:72
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:303
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:252
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef save(const char *S)
Definition: StringSaver.h:52
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool use_empty() const
Definition: Value.h:344
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
size_type size() const
Definition: DenseSet.h:81
bool erase(const ValueT &V)
Definition: DenseSet.h:97
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ NVCL
Definition: NVPTX.h:79
@ CUDA
Definition: NVPTX.h:80
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
uint64_t read64le(const void *P)
Definition: Endian.h:428
uint32_t read32le(const void *P)
Definition: Endian.h:425
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isManaged(const Value &V)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
std::optional< unsigned > getMaxNReg(const Function &F)
std::optional< unsigned > getMaxNTIDy(const Function &F)
StringRef getSamplerName(const Value &V)
bool isImageReadWrite(const Value &V)
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
std::optional< unsigned > getMaxNTIDz(const Function &F)
MaybeAlign getAlign(const Function &F, unsigned Index)
std::optional< unsigned > getMaxNTIDx(const Function &F)
std::optional< unsigned > getMinCTASm(const Function &F)
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
bool isImage(const Value &V)
bool isSampler(const Value &V)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
std::optional< unsigned > getReqNTIDy(const Function &F)
bool isSurface(const Value &V)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:200
std::optional< unsigned > getMaxClusterRank(const Function &F)
StringRef getTextureName(const Value &V)
std::optional< unsigned > getClusterDimx(const Function &F)
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
StringRef getSurfaceName(const Value &V)
std::optional< unsigned > getClusterDimy(const Function &F)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool isTexture(const Value &V)
bool isImageWriteOnly(const Value &V)
std::optional< unsigned > getReqNTIDz(const Function &F)
std::optional< unsigned > getReqNTIDx(const Function &F)
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
std::optional< unsigned > getClusterDimz(const Function &F)
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:257
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:258
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...