LLVM 22.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXMCExpr.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXSubtarget.h"
24#include "NVPTXTargetMachine.h"
25#include "NVPTXUtilities.h"
27#include "cl_common_defines.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/DenseSet.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/Twine.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/Attributes.h"
54#include "llvm/IR/BasicBlock.h"
55#include "llvm/IR/Constant.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/DataLayout.h"
58#include "llvm/IR/DebugInfo.h"
60#include "llvm/IR/DebugLoc.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/GlobalAlias.h"
64#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/Instruction.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/User.h"
72#include "llvm/MC/MCExpr.h"
73#include "llvm/MC/MCInst.h"
74#include "llvm/MC/MCInstrDesc.h"
75#include "llvm/MC/MCStreamer.h"
76#include "llvm/MC/MCSymbol.h"
81#include "llvm/Support/Endian.h"
88#include <cassert>
89#include <cstdint>
90#include <cstring>
91#include <string>
92#include <utility>
93#include <vector>
94
95using namespace llvm;
96
97#define DEPOTNAME "__local_depot"
98
99/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
100/// depends.
101static void
104 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
105 Globals.insert(GV);
106 return;
107 }
108
109 if (const User *U = dyn_cast<User>(V))
110 for (const auto &O : U->operands())
111 discoverDependentGlobals(O, Globals);
112}
113
114/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
115/// instances to be emitted, but only after any dependents have been added
116/// first.s
117static void
122 // Have we already visited this one?
123 if (Visited.count(GV))
124 return;
125
126 // Do we have a circular dependency?
127 if (!Visiting.insert(GV).second)
128 report_fatal_error("Circular dependency found in global variable set");
129
130 // Make sure we visit all dependents first
132 for (const auto &O : GV->operands())
133 discoverDependentGlobals(O, Others);
134
135 for (const GlobalVariable *GV : Others)
136 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
137
138 // Now we can visit ourself
139 Order.push_back(GV);
140 Visited.insert(GV);
141 Visiting.erase(GV);
142}
143
144void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
145 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
146 getSubtargetInfo().getFeatureBits());
147
148 MCInst Inst;
149 lowerToMCInst(MI, Inst);
151}
152
153void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
154 OutMI.setOpcode(MI->getOpcode());
155 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
156 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
157 const MachineOperand &MO = MI->getOperand(0);
158 OutMI.addOperand(GetSymbolRef(
160 return;
161 }
162
163 for (const auto MO : MI->operands())
164 OutMI.addOperand(lowerOperand(MO));
165}
166
167MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
168 switch (MO.getType()) {
169 default:
170 llvm_unreachable("unknown operand type");
172 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
174 return MCOperand::createImm(MO.getImm());
179 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
181 return GetSymbolRef(getSymbol(MO.getGlobal()));
183 const ConstantFP *Cnt = MO.getFPImm();
184 const APFloat &Val = Cnt->getValueAPF();
185
186 switch (Cnt->getType()->getTypeID()) {
187 default:
188 report_fatal_error("Unsupported FP type");
189 break;
190 case Type::HalfTyID:
193 case Type::BFloatTyID:
196 case Type::FloatTyID:
199 case Type::DoubleTyID:
202 }
203 break;
204 }
205 }
206}
207
208unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
210 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
211
212 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
213 unsigned RegNum = RegMap[Reg];
214
215 // Encode the register class in the upper 4 bits
216 // Must be kept in sync with NVPTXInstPrinter::printRegName
217 unsigned Ret = 0;
218 if (RC == &NVPTX::B1RegClass) {
219 Ret = (1 << 28);
220 } else if (RC == &NVPTX::B16RegClass) {
221 Ret = (2 << 28);
222 } else if (RC == &NVPTX::B32RegClass) {
223 Ret = (3 << 28);
224 } else if (RC == &NVPTX::B64RegClass) {
225 Ret = (4 << 28);
226 } else if (RC == &NVPTX::B128RegClass) {
227 Ret = (7 << 28);
228 } else {
229 report_fatal_error("Bad register class");
230 }
231
232 // Insert the vreg number
233 Ret |= (RegNum & 0x0FFFFFFF);
234 return Ret;
235 } else {
236 // Some special-use registers are actually physical registers.
237 // Encode this as the register class ID of 0 and the real register ID.
238 return Reg & 0x0FFFFFFF;
239 }
240}
241
242MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
243 const MCExpr *Expr;
244 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
245 return MCOperand::createExpr(Expr);
246}
247
248void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
249 const DataLayout &DL = getDataLayout();
251 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
252
253 Type *Ty = F->getReturnType();
254 if (Ty->getTypeID() == Type::VoidTyID)
255 return;
256 O << " (";
257
258 auto PrintScalarRetVal = [&](unsigned Size) {
259 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
260 };
261 if (shouldPassAsArray(Ty)) {
262 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
263 const Align RetAlignment = TLI->getFunctionArgumentAlignment(
265 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
266 << TotalSize << "]";
267 } else if (Ty->isFloatingPointTy()) {
268 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
269 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
270 PrintScalarRetVal(ITy->getBitWidth());
271 } else if (isa<PointerType>(Ty)) {
272 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
273 } else
274 llvm_unreachable("Unknown return type");
275 O << ") ";
276}
277
278void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
279 raw_ostream &O) {
280 const Function &F = MF.getFunction();
281 printReturnValStr(&F, O);
282}
283
284// Return true if MBB is the header of a loop marked with
285// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
286bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
287 const MachineBasicBlock &MBB) const {
288 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
289 // We insert .pragma "nounroll" only to the loop header.
290 if (!LI.isLoopHeader(&MBB))
291 return false;
292
293 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
294 // we iterate through each back edge of the loop with header MBB, and check
295 // whether its metadata contains llvm.loop.unroll.disable.
296 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
297 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
298 // Edges from other loops to MBB are not back edges.
299 continue;
300 }
301 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
302 if (MDNode *LoopID =
303 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
304 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
305 return true;
306 if (MDNode *UnrollCountMD =
307 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
308 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
309 ->isOne())
310 return true;
311 }
312 }
313 }
314 }
315 return false;
316}
317
318void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
320 if (isLoopHeaderOfNoUnroll(MBB))
321 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
322}
323
324void NVPTXAsmPrinter::emitFunctionEntryLabel() {
327
328 if (!GlobalsEmitted) {
329 emitGlobals(*MF->getFunction().getParent());
330 GlobalsEmitted = true;
331 }
332
333 // Set up
334 MRI = &MF->getRegInfo();
335 F = &MF->getFunction();
336 emitLinkageDirective(F, O);
337 if (isKernelFunction(*F))
338 O << ".entry ";
339 else {
340 O << ".func ";
341 printReturnValStr(*MF, O);
342 }
343
345
346 emitFunctionParamList(F, O);
347 O << "\n";
348
349 if (isKernelFunction(*F))
350 emitKernelFunctionDirectives(*F, O);
351
353 O << ".noreturn";
354
355 OutStreamer->emitRawText(O.str());
356
357 VRegMapping.clear();
358 // Emit open brace for function body.
359 OutStreamer->emitRawText(StringRef("{\n"));
360 setAndEmitFunctionVirtualRegisters(*MF);
361 encodeDebugInfoRegisterNumbers(*MF);
362 // Emit initial .loc debug directive for correct relocation symbol data.
363 if (const DISubprogram *SP = MF->getFunction().getSubprogram()) {
364 assert(SP->getUnit());
365 if (!SP->getUnit()->isDebugDirectivesOnly())
367 }
368}
369
371 bool Result = AsmPrinter::runOnMachineFunction(F);
372 // Emit closing brace for the body of function F.
373 // The closing brace must be emitted here because we need to emit additional
374 // debug labels/data after the last basic block.
375 // We need to emit the closing brace here because we don't have function that
376 // finished emission of the function body.
377 OutStreamer->emitRawText(StringRef("}\n"));
378 return Result;
379}
380
381void NVPTXAsmPrinter::emitFunctionBodyStart() {
383 raw_svector_ostream O(Str);
384 emitDemotedVars(&MF->getFunction(), O);
385 OutStreamer->emitRawText(O.str());
386}
387
388void NVPTXAsmPrinter::emitFunctionBodyEnd() {
389 VRegMapping.clear();
390}
391
395 return OutContext.getOrCreateSymbol(Str);
396}
397
398void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
399 Register RegNo = MI->getOperand(0).getReg();
400 if (RegNo.isVirtual()) {
401 OutStreamer->AddComment(Twine("implicit-def: ") +
403 } else {
404 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
405 OutStreamer->AddComment(Twine("implicit-def: ") +
406 STI.getRegisterInfo()->getName(RegNo));
407 }
408 OutStreamer->addBlankLine();
409}
410
411void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
412 raw_ostream &O) const {
413 // If the NVVM IR has some of reqntid* specified, then output
414 // the reqntid directive, and set the unspecified ones to 1.
415 // If none of Reqntid* is specified, don't output reqntid directive.
416 const auto ReqNTID = getReqNTID(F);
417 if (!ReqNTID.empty())
418 O << formatv(".reqntid {0:$[, ]}\n",
419 make_range(ReqNTID.begin(), ReqNTID.end()));
420
421 const auto MaxNTID = getMaxNTID(F);
422 if (!MaxNTID.empty())
423 O << formatv(".maxntid {0:$[, ]}\n",
424 make_range(MaxNTID.begin(), MaxNTID.end()));
425
426 if (const auto Mincta = getMinCTASm(F))
427 O << ".minnctapersm " << *Mincta << "\n";
428
429 if (const auto Maxnreg = getMaxNReg(F))
430 O << ".maxnreg " << *Maxnreg << "\n";
431
432 // .maxclusterrank directive requires SM_90 or higher, make sure that we
433 // filter it out for lower SM versions, as it causes a hard ptxas crash.
434 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
435 const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
436
437 if (STI->getSmVersion() >= 90) {
438 const auto ClusterDim = getClusterDim(F);
439 const bool BlocksAreClusters = hasBlocksAreClusters(F);
440
441 if (!ClusterDim.empty()) {
442
443 if (!BlocksAreClusters)
444 O << ".explicitcluster\n";
445
446 if (ClusterDim[0] != 0) {
447 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D != 0; }) &&
448 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
449 "should be non-zero as well");
450
451 O << formatv(".reqnctapercluster {0:$[, ]}\n",
452 make_range(ClusterDim.begin(), ClusterDim.end()));
453 } else {
454 assert(llvm::all_of(ClusterDim, [](unsigned D) { return D == 0; }) &&
455 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
456 "should be 0 as well");
457 }
458 }
459
460 if (BlocksAreClusters) {
461 LLVMContext &Ctx = F.getContext();
462 if (ReqNTID.empty() || ClusterDim.empty())
464 F, "blocksareclusters requires reqntid and cluster_dim attributes",
465 F.getSubprogram()));
466 else if (STI->getPTXVersion() < 90)
468 F, "blocksareclusters requires PTX version >= 9.0",
469 F.getSubprogram()));
470 else
471 O << ".blocksareclusters\n";
472 }
473
474 if (const auto Maxclusterrank = getMaxClusterRank(F))
475 O << ".maxclusterrank " << *Maxclusterrank << "\n";
476 }
477}
478
479std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
480 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
481
482 std::string Name;
483 raw_string_ostream NameStr(Name);
484
485 VRegRCMap::const_iterator I = VRegMapping.find(RC);
486 assert(I != VRegMapping.end() && "Bad register class");
487 const DenseMap<unsigned, unsigned> &RegMap = I->second;
488
489 VRegMap::const_iterator VI = RegMap.find(Reg);
490 assert(VI != RegMap.end() && "Bad virtual register");
491 unsigned MappedVR = VI->second;
492
493 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
494
495 return Name;
496}
497
498void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
499 raw_ostream &O) {
500 O << getVirtualRegisterName(vr);
501}
502
503void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
504 raw_ostream &O) {
505 const Function *F = dyn_cast_or_null<Function>(GA->getAliaseeObject());
506 if (!F || isKernelFunction(*F) || F->isDeclaration())
508 "NVPTX aliasee must be a non-kernel function definition");
509
510 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
512 report_fatal_error("NVPTX aliasee must not be '.weak'");
513
514 emitDeclarationWithName(F, getSymbol(GA), O);
515}
516
517void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
518 emitDeclarationWithName(F, getSymbol(F), O);
519}
520
521void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
522 raw_ostream &O) {
523 emitLinkageDirective(F, O);
524 if (isKernelFunction(*F))
525 O << ".entry ";
526 else
527 O << ".func ";
528 printReturnValStr(F, O);
529 S->print(O, MAI);
530 O << "\n";
531 emitFunctionParamList(F, O);
532 O << "\n";
534 O << ".noreturn";
535 O << ";\n";
536}
537
538static bool usedInGlobalVarDef(const Constant *C) {
539 if (!C)
540 return false;
541
542 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
543 return GV->getName() != "llvm.used";
544
545 for (const User *U : C->users())
546 if (const Constant *C = dyn_cast<Constant>(U))
548 return true;
549
550 return false;
551}
552
553static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
554 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
555 if (OtherGV->getName() == "llvm.used")
556 return true;
557
558 if (const Instruction *I = dyn_cast<Instruction>(U)) {
559 if (const Function *CurFunc = I->getFunction()) {
560 if (OneFunc && (CurFunc != OneFunc))
561 return false;
562 OneFunc = CurFunc;
563 return true;
564 }
565 return false;
566 }
567
568 for (const User *UU : U->users())
569 if (!usedInOneFunc(UU, OneFunc))
570 return false;
571
572 return true;
573}
574
575/* Find out if a global variable can be demoted to local scope.
576 * Currently, this is valid for CUDA shared variables, which have local
577 * scope and global lifetime. So the conditions to check are :
578 * 1. Is the global variable in shared address space?
579 * 2. Does it have local linkage?
580 * 3. Is the global variable referenced only in one function?
581 */
582static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
583 if (!GV->hasLocalLinkage())
584 return false;
586 return false;
587
588 const Function *oneFunc = nullptr;
589
590 bool flag = usedInOneFunc(GV, oneFunc);
591 if (!flag)
592 return false;
593 if (!oneFunc)
594 return false;
595 f = oneFunc;
596 return true;
597}
598
599static bool useFuncSeen(const Constant *C,
600 const SmallPtrSetImpl<const Function *> &SeenSet) {
601 for (const User *U : C->users()) {
602 if (const Constant *cu = dyn_cast<Constant>(U)) {
603 if (useFuncSeen(cu, SeenSet))
604 return true;
605 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
606 if (const Function *Caller = I->getFunction())
607 if (SeenSet.contains(Caller))
608 return true;
609 }
610 }
611 return false;
612}
613
614void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
616 for (const Function &F : M) {
617 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
618 emitDeclaration(&F, O);
619 continue;
620 }
621
622 if (F.isDeclaration()) {
623 if (F.use_empty())
624 continue;
625 if (F.getIntrinsicID())
626 continue;
627 emitDeclaration(&F, O);
628 continue;
629 }
630 for (const User *U : F.users()) {
631 if (const Constant *C = dyn_cast<Constant>(U)) {
632 if (usedInGlobalVarDef(C)) {
633 // The use is in the initialization of a global variable
634 // that is a function pointer, so print a declaration
635 // for the original function
636 emitDeclaration(&F, O);
637 break;
638 }
639 // Emit a declaration of this function if the function that
640 // uses this constant expr has already been seen.
641 if (useFuncSeen(C, SeenSet)) {
642 emitDeclaration(&F, O);
643 break;
644 }
645 }
646
647 if (!isa<Instruction>(U))
648 continue;
649 const Function *Caller = cast<Instruction>(U)->getFunction();
650 if (!Caller)
651 continue;
652
653 // If a caller has already been seen, then the caller is
654 // appearing in the module before the callee. so print out
655 // a declaration for the callee.
656 if (SeenSet.contains(Caller)) {
657 emitDeclaration(&F, O);
658 break;
659 }
660 }
661 SeenSet.insert(&F);
662 }
663 for (const GlobalAlias &GA : M.aliases())
664 emitAliasDeclaration(&GA, O);
665}
666
667void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
668 // Construct a default subtarget off of the TargetMachine defaults. The
669 // rest of NVPTX isn't friendly to change subtargets per function and
670 // so the default TargetMachine will have all of the options.
671 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
672 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
673 SmallString<128> Str1;
674 raw_svector_ostream OS1(Str1);
675
676 // Emit header before any dwarf directives are emitted below.
677 emitHeader(M, OS1, *STI);
678 OutStreamer->emitRawText(OS1.str());
679}
680
682 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
683 const NVPTXSubtarget &STI =
684 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
685 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
686 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
687
688 // We need to call the parent's one explicitly.
689 bool Result = AsmPrinter::doInitialization(M);
690
691 GlobalsEmitted = false;
692
693 return Result;
694}
695
696void NVPTXAsmPrinter::emitGlobals(const Module &M) {
697 SmallString<128> Str2;
698 raw_svector_ostream OS2(Str2);
699
700 emitDeclarations(M, OS2);
701
702 // As ptxas does not support forward references of globals, we need to first
703 // sort the list of module-level globals in def-use order. We visit each
704 // global variable in order, and ensure that we emit it *after* its dependent
705 // globals. We use a little extra memory maintaining both a set and a list to
706 // have fast searches while maintaining a strict ordering.
710
711 // Visit each global variable, in order
712 for (const GlobalVariable &I : M.globals())
713 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
714
715 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
716 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
717
718 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
719 const NVPTXSubtarget &STI =
720 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
721
722 // Print out module-level global variables in proper order
723 for (const GlobalVariable *GV : Globals)
724 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
725
726 OS2 << '\n';
727
728 OutStreamer->emitRawText(OS2.str());
729}
730
731void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
734
735 MCSymbol *Name = getSymbol(&GA);
736
737 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
738 << ";\n";
739
740 OutStreamer->emitRawText(OS.str());
741}
742
743void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
744 const NVPTXSubtarget &STI) {
745 const unsigned PTXVersion = STI.getPTXVersion();
746
747 O << "//\n"
748 "// Generated by LLVM NVPTX Back-End\n"
749 "//\n"
750 "\n"
751 << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"
752 << ".target " << STI.getTargetName();
753
754 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
755 if (NTM.getDrvInterface() == NVPTX::NVCL)
756 O << ", texmode_independent";
757
758 bool HasFullDebugInfo = false;
759 for (DICompileUnit *CU : M.debug_compile_units()) {
760 switch(CU->getEmissionKind()) {
763 break;
766 HasFullDebugInfo = true;
767 break;
768 }
769 if (HasFullDebugInfo)
770 break;
771 }
772 if (HasFullDebugInfo)
773 O << ", debug";
774
775 O << "\n"
776 << ".address_size " << (NTM.is64Bit() ? "64" : "32") << "\n"
777 << "\n";
778}
779
781 // If we did not emit any functions, then the global declarations have not
782 // yet been emitted.
783 if (!GlobalsEmitted) {
784 emitGlobals(M);
785 GlobalsEmitted = true;
786 }
787
788 // call doFinalization
789 bool ret = AsmPrinter::doFinalization(M);
790
792
793 auto *TS =
794 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
795 // Close the last emitted section
796 if (hasDebugInfo()) {
797 TS->closeLastSection();
798 // Emit empty .debug_macinfo section for better support of the empty files.
799 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
800 }
801
802 // Output last DWARF .file directives, if any.
803 TS->outputDwarfFileDirectives();
804
805 return ret;
806}
807
808// This function emits appropriate linkage directives for
809// functions and global variables.
810//
811// extern function declaration -> .extern
812// extern function definition -> .visible
813// external global variable with init -> .visible
814// external without init -> .extern
815// appending -> not allowed, assert.
816// for any linkage other than
817// internal, private, linker_private,
818// linker_private_weak, linker_private_weak_def_auto,
819// we emit -> .weak.
820
821void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
822 raw_ostream &O) {
823 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
824 if (V->hasExternalLinkage()) {
825 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
826 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
827 else if (V->isDeclaration())
828 O << ".extern ";
829 else
830 O << ".visible ";
831 } else if (V->hasAppendingLinkage()) {
832 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
833 "' has unsupported appending linkage type");
834 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
835 O << ".weak ";
836 }
837 }
838}
839
840void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
841 raw_ostream &O, bool ProcessDemoted,
842 const NVPTXSubtarget &STI) {
843 // Skip meta data
844 if (GVar->hasSection())
845 if (GVar->getSection() == "llvm.metadata")
846 return;
847
848 // Skip LLVM intrinsic global variables
849 if (GVar->getName().starts_with("llvm.") ||
850 GVar->getName().starts_with("nvvm."))
851 return;
852
853 const DataLayout &DL = getDataLayout();
854
855 // GlobalVariables are always constant pointers themselves.
856 Type *ETy = GVar->getValueType();
857
858 if (GVar->hasExternalLinkage()) {
859 if (GVar->hasInitializer())
860 O << ".visible ";
861 else
862 O << ".extern ";
863 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
865 O << ".common ";
866 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
868 GVar->hasCommonLinkage()) {
869 O << ".weak ";
870 }
871
872 if (isTexture(*GVar)) {
873 O << ".global .texref " << getTextureName(*GVar) << ";\n";
874 return;
875 }
876
877 if (isSurface(*GVar)) {
878 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
879 return;
880 }
881
882 if (GVar->isDeclaration()) {
883 // (extern) declarations, no definition or initializer
884 // Currently the only known declaration is for an automatic __local
885 // (.shared) promoted to global.
886 emitPTXGlobalVariable(GVar, O, STI);
887 O << ";\n";
888 return;
889 }
890
891 if (isSampler(*GVar)) {
892 O << ".global .samplerref " << getSamplerName(*GVar);
893
894 const Constant *Initializer = nullptr;
895 if (GVar->hasInitializer())
896 Initializer = GVar->getInitializer();
897 const ConstantInt *CI = nullptr;
898 if (Initializer)
899 CI = dyn_cast<ConstantInt>(Initializer);
900 if (CI) {
901 unsigned sample = CI->getZExtValue();
902
903 O << " = { ";
904
905 for (int i = 0,
906 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
907 i < 3; i++) {
908 O << "addr_mode_" << i << " = ";
909 switch (addr) {
910 case 0:
911 O << "wrap";
912 break;
913 case 1:
914 O << "clamp_to_border";
915 break;
916 case 2:
917 O << "clamp_to_edge";
918 break;
919 case 3:
920 O << "wrap";
921 break;
922 case 4:
923 O << "mirror";
924 break;
925 }
926 O << ", ";
927 }
928 O << "filter_mode = ";
929 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
930 case 0:
931 O << "nearest";
932 break;
933 case 1:
934 O << "linear";
935 break;
936 case 2:
937 llvm_unreachable("Anisotropic filtering is not supported");
938 default:
939 O << "nearest";
940 break;
941 }
942 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
943 O << ", force_unnormalized_coords = 1";
944 }
945 O << " }";
946 }
947
948 O << ";\n";
949 return;
950 }
951
952 if (GVar->hasPrivateLinkage()) {
953 if (GVar->getName().starts_with("unrollpragma"))
954 return;
955
956 // FIXME - need better way (e.g. Metadata) to avoid generating this global
957 if (GVar->getName().starts_with("filename"))
958 return;
959 if (GVar->use_empty())
960 return;
961 }
962
963 const Function *DemotedFunc = nullptr;
964 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
965 O << "// " << GVar->getName() << " has been demoted\n";
966 localDecls[DemotedFunc].push_back(GVar);
967 return;
968 }
969
970 O << ".";
971 emitPTXAddressSpace(GVar->getAddressSpace(), O);
972
973 if (isManaged(*GVar)) {
974 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
976 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
977 O << " .attribute(.managed)";
978 }
979
980 O << " .align "
981 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
982
983 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
984 ETy->getScalarSizeInBits() <= 64)) {
985 O << " .";
986 // Special case: ABI requires that we use .u8 for predicates
987 if (ETy->isIntegerTy(1))
988 O << "u8";
989 else
990 O << getPTXFundamentalTypeStr(ETy, false);
991 O << " ";
992 getSymbol(GVar)->print(O, MAI);
993
994 // Ptx allows variable initilization only for constant and global state
995 // spaces.
996 if (GVar->hasInitializer()) {
997 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
999 const Constant *Initializer = GVar->getInitializer();
1000 // 'undef' is treated as there is no value specified.
1001 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1002 O << " = ";
1003 printScalarConstant(Initializer, O);
1004 }
1005 } else {
1006 // The frontend adds zero-initializer to device and constant variables
1007 // that don't have an initial value, and UndefValue to shared
1008 // variables, so skip warning for this case.
1009 if (!GVar->getInitializer()->isNullValue() &&
1010 !isa<UndefValue>(GVar->getInitializer())) {
1011 report_fatal_error("initial value of '" + GVar->getName() +
1012 "' is not allowed in addrspace(" +
1013 Twine(GVar->getAddressSpace()) + ")");
1014 }
1015 }
1016 }
1017 } else {
1018 // Although PTX has direct support for struct type and array type and
1019 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1020 // targets that support these high level field accesses. Structs, arrays
1021 // and vectors are lowered into arrays of bytes.
1022 switch (ETy->getTypeID()) {
1023 case Type::IntegerTyID: // Integers larger than 64 bits
1024 case Type::FP128TyID:
1025 case Type::StructTyID:
1026 case Type::ArrayTyID:
1027 case Type::FixedVectorTyID: {
1028 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1029 // Ptx allows variable initilization only for constant and
1030 // global state spaces.
1031 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1032 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1033 GVar->hasInitializer()) {
1034 const Constant *Initializer = GVar->getInitializer();
1035 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1036 AggBuffer aggBuffer(ElementSize, *this);
1037 bufferAggregateConstant(Initializer, &aggBuffer);
1038 if (aggBuffer.numSymbols()) {
1039 const unsigned int ptrSize = MAI->getCodePointerSize();
1040 if (ElementSize % ptrSize ||
1041 !aggBuffer.allSymbolsAligned(ptrSize)) {
1042 // Print in bytes and use the mask() operator for pointers.
1043 if (!STI.hasMaskOperator())
1045 "initialized packed aggregate with pointers '" +
1046 GVar->getName() +
1047 "' requires at least PTX ISA version 7.1");
1048 O << " .u8 ";
1049 getSymbol(GVar)->print(O, MAI);
1050 O << "[" << ElementSize << "] = {";
1051 aggBuffer.printBytes(O);
1052 O << "}";
1053 } else {
1054 O << " .u" << ptrSize * 8 << " ";
1055 getSymbol(GVar)->print(O, MAI);
1056 O << "[" << ElementSize / ptrSize << "] = {";
1057 aggBuffer.printWords(O);
1058 O << "}";
1059 }
1060 } else {
1061 O << " .b8 ";
1062 getSymbol(GVar)->print(O, MAI);
1063 O << "[" << ElementSize << "] = {";
1064 aggBuffer.printBytes(O);
1065 O << "}";
1066 }
1067 } else {
1068 O << " .b8 ";
1069 getSymbol(GVar)->print(O, MAI);
1070 if (ElementSize)
1071 O << "[" << ElementSize << "]";
1072 }
1073 } else {
1074 O << " .b8 ";
1075 getSymbol(GVar)->print(O, MAI);
1076 if (ElementSize)
1077 O << "[" << ElementSize << "]";
1078 }
1079 break;
1080 }
1081 default:
1082 llvm_unreachable("type not supported yet");
1083 }
1084 }
1085 O << ";\n";
1086}
1087
1088void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1089 const Value *v = Symbols[nSym];
1090 const Value *v0 = SymbolsBeforeStripping[nSym];
1091 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1092 MCSymbol *Name = AP.getSymbol(GVar);
1093 PointerType *PTy = dyn_cast<PointerType>(v0->getType());
1094 // Is v0 a generic pointer?
1095 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1096 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1097 os << "generic(";
1098 Name->print(os, AP.MAI);
1099 os << ")";
1100 } else {
1101 Name->print(os, AP.MAI);
1102 }
1103 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1104 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1105 AP.printMCExpr(*Expr, os);
1106 } else
1107 llvm_unreachable("symbol type unknown");
1108}
1109
1110void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1111 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1112 // Do not emit trailing zero initializers. They will be zero-initialized by
1113 // ptxas. This saves on both space requirements for the generated PTX and on
1114 // memory use by ptxas. (See:
1115 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1116 unsigned int InitializerCount = size;
1117 // TODO: symbols make this harder, but it would still be good to trim trailing
1118 // 0s for aggs with symbols as well.
1119 if (numSymbols() == 0)
1120 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1121 InitializerCount--;
1122
1123 symbolPosInBuffer.push_back(InitializerCount);
1124 unsigned int nSym = 0;
1125 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1126 for (unsigned int pos = 0; pos < InitializerCount;) {
1127 if (pos)
1128 os << ", ";
1129 if (pos != nextSymbolPos) {
1130 os << (unsigned int)buffer[pos];
1131 ++pos;
1132 continue;
1133 }
1134 // Generate a per-byte mask() operator for the symbol, which looks like:
1135 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1136 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1137 std::string symText;
1138 llvm::raw_string_ostream oss(symText);
1139 printSymbol(nSym, oss);
1140 for (unsigned i = 0; i < ptrSize; ++i) {
1141 if (i)
1142 os << ", ";
1143 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1144 os << "(" << symText << ")";
1145 }
1146 pos += ptrSize;
1147 nextSymbolPos = symbolPosInBuffer[++nSym];
1148 assert(nextSymbolPos >= pos);
1149 }
1150}
1151
1152void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1153 unsigned int ptrSize = AP.MAI->getCodePointerSize();
1154 symbolPosInBuffer.push_back(size);
1155 unsigned int nSym = 0;
1156 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1157 assert(nextSymbolPos % ptrSize == 0);
1158 for (unsigned int pos = 0; pos < size; pos += ptrSize) {
1159 if (pos)
1160 os << ", ";
1161 if (pos == nextSymbolPos) {
1162 printSymbol(nSym, os);
1163 nextSymbolPos = symbolPosInBuffer[++nSym];
1164 assert(nextSymbolPos % ptrSize == 0);
1165 assert(nextSymbolPos >= pos + ptrSize);
1166 } else if (ptrSize == 4)
1167 os << support::endian::read32le(&buffer[pos]);
1168 else
1169 os << support::endian::read64le(&buffer[pos]);
1170 }
1171}
1172
1173void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1174 auto It = localDecls.find(F);
1175 if (It == localDecls.end())
1176 return;
1177
1178 ArrayRef<const GlobalVariable *> GVars = It->second;
1179
1180 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1181 const NVPTXSubtarget &STI =
1182 *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1183
1184 for (const GlobalVariable *GV : GVars) {
1185 O << "\t// demoted variable\n\t";
1186 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1187 }
1188}
1189
1190void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1191 raw_ostream &O) const {
1192 switch (AddressSpace) {
1194 O << "local";
1195 break;
1197 O << "global";
1198 break;
1200 O << "const";
1201 break;
1203 O << "shared";
1204 break;
1205 default:
1206 report_fatal_error("Bad address space found while emitting PTX: " +
1208 break;
1209 }
1210}
1211
1212std::string
1213NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1214 switch (Ty->getTypeID()) {
1215 case Type::IntegerTyID: {
1216 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1217 if (NumBits == 1)
1218 return "pred";
1219 if (NumBits <= 64) {
1220 std::string name = "u";
1221 return name + utostr(NumBits);
1222 }
1223 llvm_unreachable("Integer too large");
1224 break;
1225 }
1226 case Type::BFloatTyID:
1227 case Type::HalfTyID:
1228 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1229 // PTX assembly.
1230 return "b16";
1231 case Type::FloatTyID:
1232 return "f32";
1233 case Type::DoubleTyID:
1234 return "f64";
1235 case Type::PointerTyID: {
1236 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1237 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1238
1239 if (PtrSize == 64)
1240 if (useB4PTR)
1241 return "b64";
1242 else
1243 return "u64";
1244 else if (useB4PTR)
1245 return "b32";
1246 else
1247 return "u32";
1248 }
1249 default:
1250 break;
1251 }
1252 llvm_unreachable("unexpected type");
1253}
1254
1255void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1256 raw_ostream &O,
1257 const NVPTXSubtarget &STI) {
1258 const DataLayout &DL = getDataLayout();
1259
1260 // GlobalVariables are always constant pointers themselves.
1261 Type *ETy = GVar->getValueType();
1262
1263 O << ".";
1264 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1265 if (isManaged(*GVar)) {
1266 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1268 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1269
1270 O << " .attribute(.managed)";
1271 }
1272 O << " .align "
1273 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1274
1275 // Special case for i128/fp128
1276 if (ETy->getScalarSizeInBits() == 128) {
1277 O << " .b8 ";
1278 getSymbol(GVar)->print(O, MAI);
1279 O << "[16]";
1280 return;
1281 }
1282
1283 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1284 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1285 getSymbol(GVar)->print(O, MAI);
1286 return;
1287 }
1288
1289 int64_t ElementSize = 0;
1290
1291 // Although PTX has direct support for struct type and array type and LLVM IR
1292 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1293 // support these high level field accesses. Structs and arrays are lowered
1294 // into arrays of bytes.
1295 switch (ETy->getTypeID()) {
1296 case Type::StructTyID:
1297 case Type::ArrayTyID:
1299 ElementSize = DL.getTypeStoreSize(ETy);
1300 O << " .b8 ";
1301 getSymbol(GVar)->print(O, MAI);
1302 O << "[";
1303 if (ElementSize) {
1304 O << ElementSize;
1305 }
1306 O << "]";
1307 break;
1308 default:
1309 llvm_unreachable("type not supported yet");
1310 }
1311}
1312
1313void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1314 const DataLayout &DL = getDataLayout();
1315 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1316 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1317 const NVPTXMachineFunctionInfo *MFI =
1318 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1319
1320 bool IsFirst = true;
1321 const bool IsKernelFunc = isKernelFunction(*F);
1322
1323 if (F->arg_empty() && !F->isVarArg()) {
1324 O << "()";
1325 return;
1326 }
1327
1328 O << "(\n";
1329
1330 for (const Argument &Arg : F->args()) {
1331 Type *Ty = Arg.getType();
1332 const std::string ParamSym = TLI->getParamName(F, Arg.getArgNo());
1333
1334 if (!IsFirst)
1335 O << ",\n";
1336
1337 IsFirst = false;
1338
1339 // Handle image/sampler parameters
1340 if (IsKernelFunc) {
1341 const bool IsSampler = isSampler(Arg);
1342 const bool IsTexture = !IsSampler && isImageReadOnly(Arg);
1343 const bool IsSurface = !IsSampler && !IsTexture &&
1344 (isImageReadWrite(Arg) || isImageWriteOnly(Arg));
1345 if (IsSampler || IsTexture || IsSurface) {
1346 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1347 O << "\t.param ";
1348 if (EmitImgPtr)
1349 O << ".u64 .ptr ";
1350
1351 if (IsSampler)
1352 O << ".samplerref ";
1353 else if (IsTexture)
1354 O << ".texref ";
1355 else // IsSurface
1356 O << ".surfref ";
1357 O << ParamSym;
1358 continue;
1359 }
1360 }
1361
1362 auto GetOptimalAlignForParam = [TLI, &DL, F, &Arg](Type *Ty) -> Align {
1363 if (MaybeAlign StackAlign =
1364 getAlign(*F, Arg.getArgNo() + AttributeList::FirstArgIndex))
1365 return StackAlign.value();
1366
1367 Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1368 MaybeAlign ParamAlign =
1369 Arg.hasByValAttr() ? Arg.getParamAlign() : MaybeAlign();
1370 return std::max(TypeAlign, ParamAlign.valueOrOne());
1371 };
1372
1373 if (Arg.hasByValAttr()) {
1374 // param has byVal attribute.
1375 Type *ETy = Arg.getParamByValType();
1376 assert(ETy && "Param should have byval type");
1377
1378 // Print .param .align <a> .b8 .param[size];
1379 // <a> = optimal alignment for the element type; always multiple of
1380 // PAL.getParamAlignment
1381 // size = typeallocsize of element type
1382 const Align OptimalAlign =
1383 IsKernelFunc ? GetOptimalAlignForParam(ETy)
1384 : TLI->getFunctionByValParamAlign(
1385 F, ETy, Arg.getParamAlign().valueOrOne(), DL);
1386
1387 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1388 << "[" << DL.getTypeAllocSize(ETy) << "]";
1389 continue;
1390 }
1391
1392 if (shouldPassAsArray(Ty)) {
1393 // Just print .param .align <a> .b8 .param[size];
1394 // <a> = optimal alignment for the element type; always multiple of
1395 // PAL.getParamAlignment
1396 // size = typeallocsize of element type
1397 Align OptimalAlign = GetOptimalAlignForParam(Ty);
1398
1399 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1400 << "[" << DL.getTypeAllocSize(Ty) << "]";
1401
1402 continue;
1403 }
1404 // Just a scalar
1405 auto *PTy = dyn_cast<PointerType>(Ty);
1406 unsigned PTySizeInBits = 0;
1407 if (PTy) {
1408 PTySizeInBits =
1409 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1410 assert(PTySizeInBits && "Invalid pointer size");
1411 }
1412
1413 if (IsKernelFunc) {
1414 if (PTy) {
1415 O << "\t.param .u" << PTySizeInBits << " .ptr";
1416
1417 switch (PTy->getAddressSpace()) {
1418 default:
1419 break;
1421 O << " .global";
1422 break;
1424 O << " .shared";
1425 break;
1427 O << " .const";
1428 break;
1430 O << " .local";
1431 break;
1432 }
1433
1434 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1435 << ParamSym;
1436 continue;
1437 }
1438
1439 // non-pointer scalar to kernel func
1440 O << "\t.param .";
1441 // Special case: predicate operands become .u8 types
1442 if (Ty->isIntegerTy(1))
1443 O << "u8";
1444 else
1445 O << getPTXFundamentalTypeStr(Ty);
1446 O << " " << ParamSym;
1447 continue;
1448 }
1449 // Non-kernel function, just print .param .b<size> for ABI
1450 // and .reg .b<size> for non-ABI
1451 unsigned Size;
1452 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1453 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1454 } else if (PTy) {
1455 assert(PTySizeInBits && "Invalid pointer size");
1456 Size = PTySizeInBits;
1457 } else
1459 O << "\t.param .b" << Size << " " << ParamSym;
1460 }
1461
1462 if (F->isVarArg()) {
1463 if (!IsFirst)
1464 O << ",\n";
1465 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1466 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1467 }
1468
1469 O << "\n)";
1470}
1471
1472void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1473 const MachineFunction &MF) {
1474 SmallString<128> Str;
1476
1477 // Map the global virtual register number to a register class specific
1478 // virtual register number starting from 1 with that class.
1480
1481 // Emit the Fake Stack Object
1482 const MachineFrameInfo &MFI = MF.getFrameInfo();
1483 int64_t NumBytes = MFI.getStackSize();
1484 if (NumBytes) {
1485 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1486 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1487 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1488 O << "\t.reg .b64 \t%SP;\n"
1489 << "\t.reg .b64 \t%SPL;\n";
1490 } else {
1491 O << "\t.reg .b32 \t%SP;\n"
1492 << "\t.reg .b32 \t%SPL;\n";
1493 }
1494 }
1495
1496 // Go through all virtual registers to establish the mapping between the
1497 // global virtual
1498 // register number and the per class virtual register number.
1499 // We use the per class virtual register number in the ptx output.
1500 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1502 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1503 continue;
1504 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1505 RCRegMap[VR] = RCRegMap.size() + 1;
1506 }
1507
1508 // Emit declaration of the virtual registers or 'physical' registers for
1509 // each register class
1510 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1511 const unsigned N = VRegMapping[RC].size();
1512
1513 // Only declare those registers that may be used.
1514 if (N) {
1515 const StringRef RCName = getNVPTXRegClassName(RC);
1516 const StringRef RCStr = getNVPTXRegClassStr(RC);
1517 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1518 }
1519 }
1520
1521 OutStreamer->emitRawText(O.str());
1522}
1523
1524/// Translate virtual register numbers in DebugInfo locations to their printed
1525/// encodings, as used by CUDA-GDB.
1526void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1527 const MachineFunction &MF) {
1529 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1530
1531 // Clear the old mapping, and add the new one. This mapping is used after the
1532 // printing of the current function is complete, but before the next function
1533 // is printed.
1534 registerInfo->clearDebugRegisterMap();
1535
1536 for (auto &classMap : VRegMapping) {
1537 for (auto &registerMapping : classMap.getSecond()) {
1538 auto reg = registerMapping.getFirst();
1539 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1540 }
1541 }
1542}
1543
1544void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1545 raw_ostream &O) const {
1546 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1547 bool ignored;
1548 unsigned int numHex;
1549 const char *lead;
1550
1551 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1552 numHex = 8;
1553 lead = "0f";
1555 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1556 numHex = 16;
1557 lead = "0d";
1559 } else
1560 llvm_unreachable("unsupported fp type");
1561
1562 APInt API = APF.bitcastToAPInt();
1563 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1564}
1565
1566void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1567 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1568 O << CI->getValue();
1569 return;
1570 }
1571 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1572 printFPConstant(CFP, O);
1573 return;
1574 }
1575 if (isa<ConstantPointerNull>(CPV)) {
1576 O << "0";
1577 return;
1578 }
1579 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1580 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1581 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1582 O << "generic(";
1583 getSymbol(GVar)->print(O, MAI);
1584 O << ")";
1585 } else {
1586 getSymbol(GVar)->print(O, MAI);
1587 }
1588 return;
1589 }
1590 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1591 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1592 printMCExpr(*E, O);
1593 return;
1594 }
1595 llvm_unreachable("Not scalar type found in printScalarConstant()");
1596}
1597
1598void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1599 AggBuffer *AggBuffer) {
1600 const DataLayout &DL = getDataLayout();
1601 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1602 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1603 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1604 // only the space allocated by CPV.
1605 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1606 return;
1607 }
1608
1609 // Helper for filling AggBuffer with APInts.
1610 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1611 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1612 SmallVector<unsigned char, 16> Buf(NumBytes);
1613 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1614 // input's bit width, and i1 arrays may not have a length that is a multuple
1615 // of 8. We handle the last byte separately, so we never request out of
1616 // bounds bits.
1617 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1618 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1619 }
1620 size_t LastBytePosition = (NumBytes - 1) * 8;
1621 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1622 Buf[NumBytes - 1] =
1623 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1624 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1625 };
1626
1627 switch (CPV->getType()->getTypeID()) {
1628 case Type::IntegerTyID:
1629 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1630 AddIntToBuffer(CI->getValue());
1631 break;
1632 }
1633 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1634 if (const auto *CI =
1635 dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1636 AddIntToBuffer(CI->getValue());
1637 break;
1638 }
1639 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1640 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1641 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1642 AggBuffer->addZeros(AllocSize);
1643 break;
1644 }
1645 }
1646 llvm_unreachable("unsupported integer const type");
1647 break;
1648
1649 case Type::HalfTyID:
1650 case Type::BFloatTyID:
1651 case Type::FloatTyID:
1652 case Type::DoubleTyID:
1653 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1654 break;
1655
1656 case Type::PointerTyID: {
1657 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1658 AggBuffer->addSymbol(GVar, GVar);
1659 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1660 const Value *v = Cexpr->stripPointerCasts();
1661 AggBuffer->addSymbol(v, Cexpr);
1662 }
1663 AggBuffer->addZeros(AllocSize);
1664 break;
1665 }
1666
1667 case Type::ArrayTyID:
1669 case Type::StructTyID: {
1670 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1671 bufferAggregateConstant(CPV, AggBuffer);
1672 if (Bytes > AllocSize)
1673 AggBuffer->addZeros(Bytes - AllocSize);
1674 } else if (isa<ConstantAggregateZero>(CPV))
1675 AggBuffer->addZeros(Bytes);
1676 else
1677 llvm_unreachable("Unexpected Constant type");
1678 break;
1679 }
1680
1681 default:
1682 llvm_unreachable("unsupported type");
1683 }
1684}
1685
1686void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1687 AggBuffer *aggBuffer) {
1688 const DataLayout &DL = getDataLayout();
1689
1690 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1691 for (unsigned I : llvm::seq(Val.getBitWidth() / 8))
1692 Buffer->addByte(Val.extractBitsAsZExtValue(8, I * 8));
1693 };
1694
1695 // Integers of arbitrary width
1696 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1697 ExtendBuffer(CI->getValue(), aggBuffer);
1698 return;
1699 }
1700
1701 // f128
1702 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1703 if (CFP->getType()->isFP128Ty()) {
1704 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1705 return;
1706 }
1707 }
1708
1709 // Old constants
1710 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1711 for (const auto &Op : CPV->operands())
1712 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1713 return;
1714 }
1715
1716 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1717 for (unsigned I : llvm::seq(CDS->getNumElements()))
1718 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1719 return;
1720 }
1721
1722 if (isa<ConstantStruct>(CPV)) {
1723 if (CPV->getNumOperands()) {
1724 StructType *ST = cast<StructType>(CPV->getType());
1725 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1726 int EndOffset = (I + 1 == CPV->getNumOperands())
1727 ? DL.getStructLayout(ST)->getElementOffset(0) +
1728 DL.getTypeAllocSize(ST)
1729 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1730 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1731 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1732 }
1733 }
1734 return;
1735 }
1736 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1737}
1738
1739/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1740/// a copy from AsmPrinter::lowerConstant, except customized to only handle
1741/// expressions that are representable in PTX and create
1742/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1743const MCExpr *
1744NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
1745 bool ProcessingGeneric) const {
1746 MCContext &Ctx = OutContext;
1747
1748 if (CV->isNullValue() || isa<UndefValue>(CV))
1749 return MCConstantExpr::create(0, Ctx);
1750
1751 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1752 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1753
1754 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1755 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1756 if (ProcessingGeneric)
1757 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1758 return Expr;
1759 }
1760
1761 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1762 if (!CE) {
1763 llvm_unreachable("Unknown constant value to lower!");
1764 }
1765
1766 switch (CE->getOpcode()) {
1767 default:
1768 break; // Error
1769
1770 case Instruction::AddrSpaceCast: {
1771 // Strip the addrspacecast and pass along the operand
1772 PointerType *DstTy = cast<PointerType>(CE->getType());
1773 if (DstTy->getAddressSpace() == 0)
1774 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1775
1776 break; // Error
1777 }
1778
1779 case Instruction::GetElementPtr: {
1780 const DataLayout &DL = getDataLayout();
1781
1782 // Generate a symbolic expression for the byte address
1783 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1784 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1785
1786 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1787 ProcessingGeneric);
1788 if (!OffsetAI)
1789 return Base;
1790
1791 int64_t Offset = OffsetAI.getSExtValue();
1793 Ctx);
1794 }
1795
1796 case Instruction::Trunc:
1797 // We emit the value and depend on the assembler to truncate the generated
1798 // expression properly. This is important for differences between
1799 // blockaddress labels. Since the two labels are in the same function, it
1800 // is reasonable to treat their delta as a 32-bit value.
1801 [[fallthrough]];
1802 case Instruction::BitCast:
1803 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1804
1805 case Instruction::IntToPtr: {
1806 const DataLayout &DL = getDataLayout();
1807
1808 // Handle casts to pointers by changing them into casts to the appropriate
1809 // integer type. This promotes constant folding and simplifies this code.
1810 Constant *Op = CE->getOperand(0);
1811 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1812 /*IsSigned*/ false, DL);
1813 if (Op)
1814 return lowerConstantForGV(Op, ProcessingGeneric);
1815
1816 break; // Error
1817 }
1818
1819 case Instruction::PtrToInt: {
1820 const DataLayout &DL = getDataLayout();
1821
1822 // Support only foldable casts to/from pointers that can be eliminated by
1823 // changing the pointer to the appropriately sized integer type.
1824 Constant *Op = CE->getOperand(0);
1825 Type *Ty = CE->getType();
1826
1827 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1828
1829 // We can emit the pointer value into this slot if the slot is an
1830 // integer slot equal to the size of the pointer.
1831 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1832 return OpExpr;
1833
1834 // Otherwise the pointer is smaller than the resultant integer, mask off
1835 // the high bits so we are sure to get a proper truncation if the input is
1836 // a constant expr.
1837 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1838 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1839 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1840 }
1841
1842 // The MC library also has a right-shift operator, but it isn't consistently
1843 // signed or unsigned between different targets.
1844 case Instruction::Add: {
1845 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1846 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1847 switch (CE->getOpcode()) {
1848 default: llvm_unreachable("Unknown binary operator constant cast expr");
1849 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1850 }
1851 }
1852 }
1853
1854 // If the code isn't optimized, there may be outstanding folding
1855 // opportunities. Attempt to fold the expression using DataLayout as a
1856 // last resort before giving up.
1858 if (C != CE)
1859 return lowerConstantForGV(C, ProcessingGeneric);
1860
1861 // Otherwise report the problem to the user.
1862 std::string S;
1864 OS << "Unsupported expression in static initializer: ";
1865 CE->printAsOperand(OS, /*PrintType=*/false,
1866 !MF ? nullptr : MF->getFunction().getParent());
1867 report_fatal_error(Twine(OS.str()));
1868}
1869
1870void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
1872}
1873
1874/// PrintAsmOperand - Print out an operand for an inline asm expression.
1875///
1876bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1877 const char *ExtraCode, raw_ostream &O) {
1878 if (ExtraCode && ExtraCode[0]) {
1879 if (ExtraCode[1] != 0)
1880 return true; // Unknown modifier.
1881
1882 switch (ExtraCode[0]) {
1883 default:
1884 // See if this is a generic print operand
1885 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
1886 case 'r':
1887 break;
1888 }
1889 }
1890
1891 printOperand(MI, OpNo, O);
1892
1893 return false;
1894}
1895
1896bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
1897 unsigned OpNo,
1898 const char *ExtraCode,
1899 raw_ostream &O) {
1900 if (ExtraCode && ExtraCode[0])
1901 return true; // Unknown modifier
1902
1903 O << '[';
1904 printMemOperand(MI, OpNo, O);
1905 O << ']';
1906
1907 return false;
1908}
1909
1910void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
1911 raw_ostream &O) {
1912 const MachineOperand &MO = MI->getOperand(OpNum);
1913 switch (MO.getType()) {
1915 if (MO.getReg().isPhysical()) {
1916 if (MO.getReg() == NVPTX::VRDepot)
1918 else
1920 } else {
1921 emitVirtualRegister(MO.getReg(), O);
1922 }
1923 break;
1924
1926 O << MO.getImm();
1927 break;
1928
1930 printFPConstant(MO.getFPImm(), O);
1931 break;
1932
1934 PrintSymbolOperand(MO, O);
1935 break;
1936
1938 MO.getMBB()->getSymbol()->print(O, MAI);
1939 break;
1940
1941 default:
1942 llvm_unreachable("Operand type not supported.");
1943 }
1944}
1945
1946void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
1947 raw_ostream &O, const char *Modifier) {
1948 printOperand(MI, OpNum, O);
1949
1950 if (Modifier && strcmp(Modifier, "add") == 0) {
1951 O << ", ";
1952 printOperand(MI, OpNum + 1, O);
1953 } else {
1954 if (MI->getOperand(OpNum + 1).isImm() &&
1955 MI->getOperand(OpNum + 1).getImm() == 0)
1956 return; // don't print ',0' or '+0'
1957 O << "+";
1958 printOperand(MI, OpNum + 1, O);
1959 }
1960}
1961
1962char NVPTXAsmPrinter::ID = 0;
1963
1964INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
1965 false, false)
1966
1967// Force static initialization.
1969LLVMInitializeNVPTXAsmPrinter() {
1972}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition: Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Looks at all the uses of the given value Returns the Liveness deduced from the uses of this value Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses If the result is MaybeLiveUses might be modified but its content should be ignored(since it might not be complete). DeadArgumentEliminationPass
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
static const char * name
Definition: SMEABIPass.cpp:52
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:6057
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:520
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:706
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:433
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:93
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:96
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:108
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: AsmPrinter.h:504
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:464
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:409
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:127
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:100
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:452
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:105
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:417
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:437
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:428
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1120
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:277
const APFloat & getValueAPF() const
Definition: Constants.h:320
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This is an important base class in LLVM.
Definition: Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Subprogram description. Uses SubclassData1.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
unsigned size() const
Definition: DenseMap.h:120
iterator end()
Definition: DenseMap.h:87
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Diagnostic information for unsupported feature in backend.
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1915
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:623
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:114
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:106
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:517
bool hasExternalLinkage() const
Definition: GlobalValue.h:513
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:316
bool hasLocalLinkage() const
Definition: GlobalValue.h:530
bool hasPrivateLinkage() const
Definition: GlobalValue.h:529
unsigned getAddressSpace() const
Definition: GlobalValue.h:207
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:663
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:296
bool hasWeakLinkage() const
Definition: GlobalValue.h:524
bool hasCommonLinkage() const
Definition: GlobalValue.h:534
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:514
Type * getValueType() const
Definition: GlobalValue.h:298
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void printExpr(raw_ostream &, const MCExpr &) const
Definition: MCAsmInfo.cpp:153
unsigned getCodePointerSize() const
Get the code pointer size in bytes.
Definition: MCAsmInfo.h:443
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:348
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:212
Context object for machine code objects.
Definition: MCContext.h:83
const MCAsmInfo * getAsmInfo() const
Definition: MCContext.h:412
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:203
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
void addOperand(const MCOperand Op)
Definition: MCInst.h:215
void setOpcode(unsigned Op)
Definition: MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:145
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:190
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:59
Metadata node.
Definition: Metadata.h:1077
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Definition: MachineInstr.h:72
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool def_empty(Register RegNo) const
def_empty - Return true if there are no instructions defining the specified register (it may be live-...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:60
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void closeLastSection()
Close last section.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:740
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:67
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
Class to represent struct types.
Definition: DerivedTypes.h:218
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getPointerSizeInBits(unsigned AS) const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
@ ArrayTyID
Arrays.
Definition: Type.h:74
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
@ VoidTyID
type with no size
Definition: Type.h:63
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ StructTyID
Structures.
Definition: Type.h:73
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:70
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:75
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition: Type.h:57
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ PointerTyID
Pointers.
Definition: Type.h:72
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:255
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
op_range operands()
Definition: User.h:292
Value * getOperand(unsigned i) const
Definition: User.h:232
unsigned getNumOperands() const
Definition: User.h:254
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool use_empty() const
Definition: Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:194
size_type size() const
Definition: DenseSet.h:87
bool erase(const ValueT &V)
Definition: DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:174
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:692
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
uint64_t read64le(const void *P)
Definition: Endian.h:432
uint32_t read32le(const void *P)
Definition: Endian.h:429
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
MaybeAlign getAlign(const CallInst &I, unsigned Index)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
std::optional< unsigned > getMaxNReg(const Function &F)
StringRef getSamplerName(const Value &V)
bool isImageReadWrite(const Value &V)
bool isImageReadOnly(const Value &V)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< unsigned > getMinCTASm(const Function &F)
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
bool isSampler(const Value &V)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
bool isSurface(const Value &V)
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:201
std::optional< unsigned > getMaxClusterRank(const Function &F)
StringRef getTextureName(const Value &V)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
StringRef getSurfaceName(const Value &V)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
bool isTexture(const Value &V)
bool isImageWriteOnly(const Value &V)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Target & getTheNVPTXTarget32()
#define N
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:267
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...