LLVM 22.0.0git
X86MCInstLower.cpp
Go to the documentation of this file.
1//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains code to lower X86 MachineInstrs to their corresponding
10// MCInst records.
11//
12//===----------------------------------------------------------------------===//
13
21#include "X86AsmPrinter.h"
23#include "X86RegisterInfo.h"
25#include "X86Subtarget.h"
26#include "llvm/ADT/STLExtras.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/Mangler.h"
39#include "llvm/MC/MCAsmInfo.h"
41#include "llvm/MC/MCContext.h"
42#include "llvm/MC/MCExpr.h"
43#include "llvm/MC/MCFixup.h"
44#include "llvm/MC/MCInst.h"
46#include "llvm/MC/MCSection.h"
47#include "llvm/MC/MCStreamer.h"
48#include "llvm/MC/MCSymbol.h"
55#include <string>
56
57using namespace llvm;
58
59static cl::opt<bool> EnableBranchHint("enable-branch-hint",
60 cl::desc("Enable branch hint."),
61 cl::init(false), cl::Hidden);
63 "branch-hint-probability-threshold",
64 cl::desc("The probability threshold of enabling branch hint."),
65 cl::init(50), cl::Hidden);
66
67namespace {
68
69/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
70class X86MCInstLower {
71 MCContext &Ctx;
72 const MachineFunction &MF;
73 const TargetMachine &TM;
74 const MCAsmInfo &MAI;
76
77public:
78 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
79
80 MCOperand LowerMachineOperand(const MachineInstr *MI,
81 const MachineOperand &MO) const;
82 void Lower(const MachineInstr *MI, MCInst &OutMI) const;
83
86
87private:
88 MachineModuleInfoMachO &getMachOMMI() const;
89};
90
91} // end anonymous namespace
92
93/// A RAII helper which defines a region of instructions which can't have
94/// padding added between them for correctness.
99 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
100 changeAndComment(false);
101 }
103 void changeAndComment(bool b) {
104 if (b == OS.getAllowAutoPadding())
105 return;
107 if (b)
108 OS.emitRawComment("autopadding");
109 else
110 OS.emitRawComment("noautopadding");
111 }
112};
113
114// Emit a minimal sequence of nops spanning NumBytes bytes.
115static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
116 const X86Subtarget *Subtarget);
117
118void X86AsmPrinter::StackMapShadowTracker::count(const MCInst &Inst,
119 const MCSubtargetInfo &STI,
120 MCCodeEmitter *CodeEmitter) {
121 if (InShadow) {
124 CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI);
125 CurrentShadowSize += Code.size();
126 if (CurrentShadowSize >= RequiredShadowSize)
127 InShadow = false; // The shadow is big enough. Stop counting.
128 }
129}
130
131void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
132 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
133 if (InShadow && CurrentShadowSize < RequiredShadowSize) {
134 InShadow = false;
135 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
136 &MF->getSubtarget<X86Subtarget>());
137 }
138}
139
140void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
141 OutStreamer->emitInstruction(Inst, getSubtargetInfo());
142 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
143}
144
145X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
146 X86AsmPrinter &asmprinter)
147 : Ctx(asmprinter.OutContext), MF(mf), TM(mf.getTarget()),
148 MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
149
150MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
152}
153
154/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
155/// operand to an MCSymbol.
156MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
157 const Triple &TT = TM.getTargetTriple();
158 if (MO.isGlobal() && TT.isOSBinFormatELF())
160
161 const DataLayout &DL = MF.getDataLayout();
162 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
163 "Isn't a symbol reference");
164
165 MCSymbol *Sym = nullptr;
167 StringRef Suffix;
168
169 switch (MO.getTargetFlags()) {
171 // Handle dllimport linkage.
172 Name += "__imp_";
173 break;
175 Name += ".refptr.";
176 break;
179 Suffix = "$non_lazy_ptr";
180 break;
181 }
182
183 if (!Suffix.empty())
184 Name += DL.getPrivateGlobalPrefix();
185
186 if (MO.isGlobal()) {
187 const GlobalValue *GV = MO.getGlobal();
189 } else if (MO.isSymbol()) {
191 } else if (MO.isMBB()) {
192 assert(Suffix.empty());
193 Sym = MO.getMBB()->getSymbol();
194 }
195
196 Name += Suffix;
197 if (!Sym)
198 Sym = Ctx.getOrCreateSymbol(Name);
199
200 // If the target flags on the operand changes the name of the symbol, do that
201 // before we return the symbol.
202 switch (MO.getTargetFlags()) {
203 default:
204 break;
205 case X86II::MO_COFFSTUB: {
206 MachineModuleInfoCOFF &MMICOFF =
209 if (!StubSym.getPointer()) {
210 assert(MO.isGlobal() && "Extern symbol not handled yet");
212 AsmPrinter.getSymbol(MO.getGlobal()), true);
213 }
214 break;
215 }
219 getMachOMMI().getGVStubEntry(Sym);
220 if (!StubSym.getPointer()) {
221 assert(MO.isGlobal() && "Extern symbol not handled yet");
225 }
226 break;
227 }
228 }
229
230 return Sym;
231}
232
233MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
234 MCSymbol *Sym) const {
235 // FIXME: We would like an efficient form for this, so we don't have to do a
236 // lot of extra uniquing.
237 const MCExpr *Expr = nullptr;
239
240 switch (MO.getTargetFlags()) {
241 default:
242 llvm_unreachable("Unknown target flag on GV operand");
243 case X86II::MO_NO_FLAG: // No flag.
244 // These affect the name of the symbol, not any suffix.
248 break;
249
250 case X86II::MO_TLVP:
252 break;
255 // Subtract the pic base.
257 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
258 break;
259 case X86II::MO_SECREL:
261 break;
262 case X86II::MO_TLSGD:
264 break;
265 case X86II::MO_TLSLD:
267 break;
268 case X86II::MO_TLSLDM:
270 break;
273 break;
276 break;
277 case X86II::MO_TPOFF:
279 break;
280 case X86II::MO_DTPOFF:
282 break;
283 case X86II::MO_NTPOFF:
285 break;
288 break;
291 break;
294 break;
295 case X86II::MO_GOT:
297 break;
298 case X86II::MO_GOTOFF:
300 break;
301 case X86II::MO_PLT:
303 break;
304 case X86II::MO_ABS8:
306 break;
309 Expr = MCSymbolRefExpr::create(Sym, Ctx);
310 // Subtract the pic base.
312 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
313 if (MO.isJTI()) {
314 assert(MAI.doesSetDirectiveSuppressReloc());
315 // If .set directive is supported, use it to reduce the number of
316 // relocations the assembler will generate for differences between
317 // local labels. This is only safe when the symbols are in the same
318 // section so we are restricting it to jumptable references.
319 MCSymbol *Label = Ctx.createTempSymbol();
320 AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
321 Expr = MCSymbolRefExpr::create(Label, Ctx);
322 }
323 break;
324 }
325
326 if (!Expr)
327 Expr = MCSymbolRefExpr::create(Sym, Specifier, Ctx);
328
329 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
331 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
332 return MCOperand::createExpr(Expr);
333}
334
335static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
336 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
337}
338
339MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
340 const MachineOperand &MO) const {
341 switch (MO.getType()) {
342 default:
343 MI->print(errs());
344 llvm_unreachable("unknown operand type");
346 // Ignore all implicit register operands.
347 if (MO.isImplicit())
348 return MCOperand();
349 return MCOperand::createReg(MO.getReg());
351 return MCOperand::createImm(MO.getImm());
357 return LowerSymbolOperand(MO, MO.getMCSymbol());
363 return LowerSymbolOperand(
366 // Ignore call clobbers.
367 return MCOperand();
368 }
369}
370
371// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
372// information.
373static unsigned convertTailJumpOpcode(unsigned Opcode) {
374 switch (Opcode) {
375 case X86::TAILJMPr:
376 Opcode = X86::JMP32r;
377 break;
378 case X86::TAILJMPm:
379 Opcode = X86::JMP32m;
380 break;
381 case X86::TAILJMPr64:
382 Opcode = X86::JMP64r;
383 break;
384 case X86::TAILJMPm64:
385 Opcode = X86::JMP64m;
386 break;
387 case X86::TAILJMPr64_REX:
388 Opcode = X86::JMP64r_REX;
389 break;
390 case X86::TAILJMPm64_REX:
391 Opcode = X86::JMP64m_REX;
392 break;
393 case X86::TAILJMPd:
394 case X86::TAILJMPd64:
395 Opcode = X86::JMP_1;
396 break;
397 case X86::TAILJMPd_CC:
398 case X86::TAILJMPd64_CC:
399 Opcode = X86::JCC_1;
400 break;
401 }
402
403 return Opcode;
404}
405
406void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
407 OutMI.setOpcode(MI->getOpcode());
408
409 for (const MachineOperand &MO : MI->operands())
410 if (auto Op = LowerMachineOperand(MI, MO); Op.isValid())
411 OutMI.addOperand(Op);
412
413 bool In64BitMode = AsmPrinter.getSubtarget().is64Bit();
414 if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) ||
417 X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) ||
418 X86::optimizeMOV(OutMI, In64BitMode) ||
420 return;
421
422 // Handle a few special cases to eliminate operand modifiers.
423 switch (OutMI.getOpcode()) {
424 case X86::LEA64_32r:
425 case X86::LEA64r:
426 case X86::LEA16r:
427 case X86::LEA32r:
428 // LEA should have a segment register, but it must be empty.
430 "Unexpected # of LEA operands");
431 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
432 "LEA has segment specified!");
433 break;
434 case X86::MULX32Hrr:
435 case X86::MULX32Hrm:
436 case X86::MULX64Hrr:
437 case X86::MULX64Hrm: {
438 // Turn into regular MULX by duplicating the destination.
439 unsigned NewOpc;
440 switch (OutMI.getOpcode()) {
441 default: llvm_unreachable("Invalid opcode");
442 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
443 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
444 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
445 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
446 }
447 OutMI.setOpcode(NewOpc);
448 // Duplicate the destination.
449 MCRegister DestReg = OutMI.getOperand(0).getReg();
450 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
451 break;
452 }
453 // CALL64r, CALL64pcrel32 - These instructions used to have
454 // register inputs modeled as normal uses instead of implicit uses. As such,
455 // they we used to truncate off all but the first operand (the callee). This
456 // issue seems to have been fixed at some point. This assert verifies that.
457 case X86::CALL64r:
458 case X86::CALL64pcrel32:
459 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
460 break;
461 case X86::EH_RETURN:
462 case X86::EH_RETURN64: {
463 OutMI = MCInst();
464 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
465 break;
466 }
467 case X86::CLEANUPRET: {
468 // Replace CLEANUPRET with the appropriate RET.
469 OutMI = MCInst();
470 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
471 break;
472 }
473 case X86::CATCHRET: {
474 // Replace CATCHRET with the appropriate RET.
475 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
476 unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX;
477 OutMI = MCInst();
478 OutMI.setOpcode(getRetOpcode(Subtarget));
479 OutMI.addOperand(MCOperand::createReg(ReturnReg));
480 break;
481 }
482 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
483 // instruction.
484 case X86::TAILJMPr:
485 case X86::TAILJMPr64:
486 case X86::TAILJMPr64_REX:
487 case X86::TAILJMPd:
488 case X86::TAILJMPd64:
489 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
491 break;
492 case X86::TAILJMPd_CC:
493 case X86::TAILJMPd64_CC:
494 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
496 break;
497 case X86::TAILJMPm:
498 case X86::TAILJMPm64:
499 case X86::TAILJMPm64_REX:
501 "Unexpected number of operands!");
503 break;
504 case X86::MASKMOVDQU:
505 case X86::VMASKMOVDQU:
506 if (In64BitMode)
508 break;
509 case X86::BSF16rm:
510 case X86::BSF16rr:
511 case X86::BSF32rm:
512 case X86::BSF32rr:
513 case X86::BSF64rm:
514 case X86::BSF64rr: {
515 // Add an REP prefix to BSF instructions so that new processors can
516 // recognize as TZCNT, which has better performance than BSF.
517 // BSF and TZCNT have different interpretations on ZF bit. So make sure
518 // it won't be used later.
519 const MachineOperand *FlagDef =
520 MI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
521 if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead())
523 break;
524 }
525 default:
526 break;
527 }
528}
529
530void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
531 const MachineInstr &MI) {
532 NoAutoPaddingScope NoPadScope(*OutStreamer);
533 bool Is64Bits = getSubtarget().is64Bit();
534 bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64();
535 MCContext &Ctx = OutStreamer->getContext();
536
538 switch (MI.getOpcode()) {
539 case X86::TLS_addr32:
540 case X86::TLS_addr64:
541 case X86::TLS_addrX32:
543 break;
544 case X86::TLS_base_addr32:
546 break;
547 case X86::TLS_base_addr64:
548 case X86::TLS_base_addrX32:
550 break;
551 case X86::TLS_desc32:
552 case X86::TLS_desc64:
554 break;
555 default:
556 llvm_unreachable("unexpected opcode");
557 }
558
560 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), Specifier, Ctx);
561
562 // Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD
563 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
564 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
565 // only using GOT when GOTPCRELX is enabled.
566 // TODO Delete the workaround when rustc no longer relies on the hack
567 bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
569
570 if (Specifier == X86::S_TLSDESC) {
572 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), X86::S_TLSCALL,
573 Ctx);
574 EmitAndCountInstruction(
575 MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
576 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
577 .addReg(Is64Bits ? X86::RIP : X86::EBX)
578 .addImm(1)
579 .addReg(0)
580 .addExpr(Sym)
581 .addReg(0));
582 EmitAndCountInstruction(
583 MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
584 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
585 .addImm(1)
586 .addReg(0)
587 .addExpr(Expr)
588 .addReg(0));
589 } else if (Is64Bits) {
590 bool NeedsPadding = Specifier == X86::S_TLSGD;
591 if (NeedsPadding && Is64BitsLP64)
592 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
593 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
594 .addReg(X86::RDI)
595 .addReg(X86::RIP)
596 .addImm(1)
597 .addReg(0)
598 .addExpr(Sym)
599 .addReg(0));
600 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
601 if (NeedsPadding) {
602 if (!UseGot)
603 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
604 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
605 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
606 }
607 if (UseGot) {
608 const MCExpr *Expr =
610 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
611 .addReg(X86::RIP)
612 .addImm(1)
613 .addReg(0)
614 .addExpr(Expr)
615 .addReg(0));
616 } else {
617 EmitAndCountInstruction(
618 MCInstBuilder(X86::CALL64pcrel32)
619 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, X86::S_PLT, Ctx)));
620 }
621 } else {
622 if (Specifier == X86::S_TLSGD && !UseGot) {
623 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
624 .addReg(X86::EAX)
625 .addReg(0)
626 .addImm(1)
627 .addReg(X86::EBX)
628 .addExpr(Sym)
629 .addReg(0));
630 } else {
631 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
632 .addReg(X86::EAX)
633 .addReg(X86::EBX)
634 .addImm(1)
635 .addReg(0)
636 .addExpr(Sym)
637 .addReg(0));
638 }
639
640 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
641 if (UseGot) {
642 const MCExpr *Expr = MCSymbolRefExpr::create(TlsGetAddr, X86::S_GOT, Ctx);
643 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
644 .addReg(X86::EBX)
645 .addImm(1)
646 .addReg(0)
647 .addExpr(Expr)
648 .addReg(0));
649 } else {
650 EmitAndCountInstruction(
651 MCInstBuilder(X86::CALLpcrel32)
652 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, X86::S_PLT, Ctx)));
653 }
654 }
655}
656
657/// Emit the largest nop instruction smaller than or equal to \p NumBytes
658/// bytes. Return the size of nop emitted.
659static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
660 const X86Subtarget *Subtarget) {
661 // Determine the longest nop which can be efficiently decoded for the given
662 // target cpu. 15-bytes is the longest single NOP instruction, but some
663 // platforms can't decode the longest forms efficiently.
664 unsigned MaxNopLength = 1;
665 if (Subtarget->is64Bit()) {
666 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
667 // IndexReg/BaseReg below need to be updated.
668 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
669 MaxNopLength = 7;
670 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
671 MaxNopLength = 15;
672 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
673 MaxNopLength = 11;
674 else
675 MaxNopLength = 10;
676 } if (Subtarget->is32Bit())
677 MaxNopLength = 2;
678
679 // Cap a single nop emission at the profitable value for the target
680 NumBytes = std::min(NumBytes, MaxNopLength);
681
682 unsigned NopSize;
683 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
684 IndexReg = Displacement = SegmentReg = 0;
685 BaseReg = X86::RAX;
686 ScaleVal = 1;
687 switch (NumBytes) {
688 case 0:
689 llvm_unreachable("Zero nops?");
690 break;
691 case 1:
692 NopSize = 1;
693 Opc = X86::NOOP;
694 break;
695 case 2:
696 NopSize = 2;
697 Opc = X86::XCHG16ar;
698 break;
699 case 3:
700 NopSize = 3;
701 Opc = X86::NOOPL;
702 break;
703 case 4:
704 NopSize = 4;
705 Opc = X86::NOOPL;
706 Displacement = 8;
707 break;
708 case 5:
709 NopSize = 5;
710 Opc = X86::NOOPL;
711 Displacement = 8;
712 IndexReg = X86::RAX;
713 break;
714 case 6:
715 NopSize = 6;
716 Opc = X86::NOOPW;
717 Displacement = 8;
718 IndexReg = X86::RAX;
719 break;
720 case 7:
721 NopSize = 7;
722 Opc = X86::NOOPL;
723 Displacement = 512;
724 break;
725 case 8:
726 NopSize = 8;
727 Opc = X86::NOOPL;
728 Displacement = 512;
729 IndexReg = X86::RAX;
730 break;
731 case 9:
732 NopSize = 9;
733 Opc = X86::NOOPW;
734 Displacement = 512;
735 IndexReg = X86::RAX;
736 break;
737 default:
738 NopSize = 10;
739 Opc = X86::NOOPW;
740 Displacement = 512;
741 IndexReg = X86::RAX;
742 SegmentReg = X86::CS;
743 break;
744 }
745
746 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
747 NopSize += NumPrefixes;
748 for (unsigned i = 0; i != NumPrefixes; ++i)
749 OS.emitBytes("\x66");
750
751 switch (Opc) {
752 default: llvm_unreachable("Unexpected opcode");
753 case X86::NOOP:
754 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
755 break;
756 case X86::XCHG16ar:
757 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
758 *Subtarget);
759 break;
760 case X86::NOOPL:
761 case X86::NOOPW:
762 OS.emitInstruction(MCInstBuilder(Opc)
763 .addReg(BaseReg)
764 .addImm(ScaleVal)
765 .addReg(IndexReg)
766 .addImm(Displacement)
767 .addReg(SegmentReg),
768 *Subtarget);
769 break;
770 }
771 assert(NopSize <= NumBytes && "We overemitted?");
772 return NopSize;
773}
774
775/// Emit the optimal amount of multi-byte nops on X86.
776static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
777 const X86Subtarget *Subtarget) {
778 unsigned NopsToEmit = NumBytes;
779 (void)NopsToEmit;
780 while (NumBytes) {
781 NumBytes -= emitNop(OS, NumBytes, Subtarget);
782 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
783 }
784}
785
786void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
787 X86MCInstLower &MCIL) {
788 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
789
790 NoAutoPaddingScope NoPadScope(*OutStreamer);
791
792 StatepointOpers SOpers(&MI);
793 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
794 emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
795 } else {
796 // Lower call target and choose correct opcode
797 const MachineOperand &CallTarget = SOpers.getCallTarget();
798 MCOperand CallTargetMCOp;
799 unsigned CallOpcode;
800 switch (CallTarget.getType()) {
803 CallTargetMCOp = MCIL.LowerSymbolOperand(
804 CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
805 CallOpcode = X86::CALL64pcrel32;
806 // Currently, we only support relative addressing with statepoints.
807 // Otherwise, we'll need a scratch register to hold the target
808 // address. You'll fail asserts during load & relocation if this
809 // symbol is to far away. (TODO: support non-relative addressing)
810 break;
812 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
813 CallOpcode = X86::CALL64pcrel32;
814 // Currently, we only support relative addressing with statepoints.
815 // Otherwise, we'll need a scratch register to hold the target
816 // immediate. You'll fail asserts during load & relocation if this
817 // address is to far away. (TODO: support non-relative addressing)
818 break;
820 // FIXME: Add retpoline support and remove this.
821 if (Subtarget->useIndirectThunkCalls())
822 report_fatal_error("Lowering register statepoints with thunks not "
823 "yet implemented.");
824 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
825 CallOpcode = X86::CALL64r;
826 break;
827 default:
828 llvm_unreachable("Unsupported operand type in statepoint call target");
829 break;
830 }
831
832 // Emit call
834 CallInst.setOpcode(CallOpcode);
835 CallInst.addOperand(CallTargetMCOp);
836 OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
837 maybeEmitNopAfterCallForWindowsEH(&MI);
838 }
839
840 // Record our statepoint node in the same section used by STACKMAP
841 // and PATCHPOINT
842 auto &Ctx = OutStreamer->getContext();
843 MCSymbol *MILabel = Ctx.createTempSymbol();
844 OutStreamer->emitLabel(MILabel);
845 SM.recordStatepoint(*MILabel, MI);
846}
847
848void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
849 X86MCInstLower &MCIL) {
850 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
851 // <opcode>, <operands>
852
853 NoAutoPaddingScope NoPadScope(*OutStreamer);
854
855 Register DefRegister = FaultingMI.getOperand(0).getReg();
857 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
858 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
859 unsigned Opcode = FaultingMI.getOperand(3).getImm();
860 unsigned OperandsBeginIdx = 4;
861
862 auto &Ctx = OutStreamer->getContext();
863 MCSymbol *FaultingLabel = Ctx.createTempSymbol();
864 OutStreamer->emitLabel(FaultingLabel);
865
866 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
867 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
868
869 MCInst MI;
870 MI.setOpcode(Opcode);
871
872 if (DefRegister != X86::NoRegister)
873 MI.addOperand(MCOperand::createReg(DefRegister));
874
875 for (const MachineOperand &MO :
876 llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx))
877 if (auto Op = MCIL.LowerMachineOperand(&FaultingMI, MO); Op.isValid())
878 MI.addOperand(Op);
879
880 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
881 OutStreamer->emitInstruction(MI, getSubtargetInfo());
882}
883
884void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
885 X86MCInstLower &MCIL) {
886 bool Is64Bits = Subtarget->is64Bit();
887 MCContext &Ctx = OutStreamer->getContext();
888 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
889 const MCSymbolRefExpr *Op = MCSymbolRefExpr::create(fentry, Ctx);
890
891 EmitAndCountInstruction(
892 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
893 .addExpr(Op));
894}
895
896void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
897 assert(std::next(MI.getIterator())->isCall() &&
898 "KCFI_CHECK not followed by a call instruction");
899
900 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
901 // returns a 1-byte X86::NOOP, which means the offset is the same in
902 // bytes. This assumes that patchable-function-prefix is the same for all
903 // functions.
904 const MachineFunction &MF = *MI.getMF();
905 int64_t PrefixNops = 0;
906 (void)MF.getFunction()
907 .getFnAttribute("patchable-function-prefix")
909 .getAsInteger(10, PrefixNops);
910
911 // KCFI allows indirect calls to any location that's preceded by a valid
912 // type identifier. To avoid encoding the full constant into an instruction,
913 // and thus emitting potential call target gadgets at each indirect call
914 // site, load a negated constant to a register and compare that to the
915 // expected value at the call target.
916 const Register AddrReg = MI.getOperand(0).getReg();
917 const uint32_t Type = MI.getOperand(1).getImm();
918 // The check is immediately before the call. If the call target is in R10,
919 // we can clobber R11 for the check instead.
920 unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D;
921 EmitAndCountInstruction(
922 MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type)));
923 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm)
924 .addReg(X86::NoRegister)
925 .addReg(TempReg)
926 .addReg(AddrReg)
927 .addImm(1)
928 .addReg(X86::NoRegister)
929 .addImm(-(PrefixNops + 4))
930 .addReg(X86::NoRegister));
931
933 EmitAndCountInstruction(
934 MCInstBuilder(X86::JCC_1)
936 .addImm(X86::COND_E));
937
939 OutStreamer->emitLabel(Trap);
940 EmitAndCountInstruction(MCInstBuilder(X86::TRAP));
942 OutStreamer->emitLabel(Pass);
943}
944
945void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
946 // FIXME: Make this work on non-ELF.
948 report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
949 return;
950 }
951
952 const auto &Reg = MI.getOperand(0).getReg();
953 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
954
955 uint64_t ShadowBase;
956 int MappingScale;
957 bool OrShadowOffset;
958 getAddressSanitizerParams(TM.getTargetTriple(), 64, AccessInfo.CompileKernel,
959 &ShadowBase, &MappingScale, &OrShadowOffset);
960
961 StringRef Name = AccessInfo.IsWrite ? "store" : "load";
962 StringRef Op = OrShadowOffset ? "or" : "add";
963 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
964 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
965 TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
966 .str();
967 if (OrShadowOffset)
969 "OrShadowOffset is not supported with optimized callbacks");
970
971 EmitAndCountInstruction(
972 MCInstBuilder(X86::CALL64pcrel32)
975}
976
977void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
978 X86MCInstLower &MCIL) {
979 // PATCHABLE_OP minsize
980
981 NoAutoPaddingScope NoPadScope(*OutStreamer);
982
983 auto NextMI = std::find_if(std::next(MI.getIterator()),
984 MI.getParent()->end().getInstrIterator(),
985 [](auto &II) { return !II.isMetaInstruction(); });
986
988 unsigned MinSize = MI.getOperand(0).getImm();
989
990 if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) {
991 // Lower the next MachineInstr to find its byte size.
992 // If the next instruction is inline assembly, we skip lowering it for now,
993 // and assume we should always generate NOPs.
994 MCInst MCI;
995 MCIL.Lower(&*NextMI, MCI);
996
998 CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo());
999 }
1000
1001 if (Code.size() < MinSize) {
1002 if (MinSize == 2 && Subtarget->is32Bit() &&
1003 Subtarget->isTargetWindowsMSVC() &&
1004 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1005 // For compatibility reasons, when targetting MSVC, it is important to
1006 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1007 // rely specifically on this pattern to be able to patch a function.
1008 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1009 OutStreamer->emitInstruction(
1010 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1011 *Subtarget);
1012 } else {
1013 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1014 assert(NopSize == MinSize && "Could not implement MinSize!");
1015 (void)NopSize;
1016 }
1017 }
1018}
1019
1020// Lower a stackmap of the form:
1021// <id>, <shadowBytes>, ...
1022void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1023 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1024
1025 auto &Ctx = OutStreamer->getContext();
1026 MCSymbol *MILabel = Ctx.createTempSymbol();
1027 OutStreamer->emitLabel(MILabel);
1028
1029 SM.recordStackMap(*MILabel, MI);
1030 unsigned NumShadowBytes = MI.getOperand(1).getImm();
1031 SMShadowTracker.reset(NumShadowBytes);
1032}
1033
1034// Lower a patchpoint of the form:
1035// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1036void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1037 X86MCInstLower &MCIL) {
1038 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1039
1040 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1041
1042 NoAutoPaddingScope NoPadScope(*OutStreamer);
1043
1044 auto &Ctx = OutStreamer->getContext();
1045 MCSymbol *MILabel = Ctx.createTempSymbol();
1046 OutStreamer->emitLabel(MILabel);
1047 SM.recordPatchPoint(*MILabel, MI);
1048
1049 PatchPointOpers opers(&MI);
1050 unsigned ScratchIdx = opers.getNextScratchIdx();
1051 unsigned EncodedBytes = 0;
1052 const MachineOperand &CalleeMO = opers.getCallTarget();
1053
1054 // Check for null target. If target is non-null (i.e. is non-zero or is
1055 // symbolic) then emit a call.
1056 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1057 MCOperand CalleeMCOp;
1058 switch (CalleeMO.getType()) {
1059 default:
1060 /// FIXME: Add a verifier check for bad callee types.
1061 llvm_unreachable("Unrecognized callee operand type.");
1063 if (CalleeMO.getImm())
1064 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1065 break;
1068 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1069 MCIL.GetSymbolFromOperand(CalleeMO));
1070 break;
1071 }
1072
1073 // Emit MOV to materialize the target address and the CALL to target.
1074 // This is encoded with 12-13 bytes, depending on which register is used.
1075 Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1076 if (X86II::isX86_64ExtendedReg(ScratchReg))
1077 EncodedBytes = 13;
1078 else
1079 EncodedBytes = 12;
1080
1081 EmitAndCountInstruction(
1082 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1083 // FIXME: Add retpoline support and remove this.
1084 if (Subtarget->useIndirectThunkCalls())
1086 "Lowering patchpoint with thunks not yet implemented.");
1087 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1088 }
1089
1090 // Emit padding.
1091 unsigned NumBytes = opers.getNumPatchBytes();
1092 assert(NumBytes >= EncodedBytes &&
1093 "Patchpoint can't request size less than the length of a call.");
1094
1095 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1096}
1097
1098void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1099 X86MCInstLower &MCIL) {
1100 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1101
1102 NoAutoPaddingScope NoPadScope(*OutStreamer);
1103
1104 // We want to emit the following pattern, which follows the x86 calling
1105 // convention to prepare for the trampoline call to be patched in.
1106 //
1107 // .p2align 1, ...
1108 // .Lxray_event_sled_N:
1109 // jmp +N // jump across the instrumentation sled
1110 // ... // set up arguments in register
1111 // callq __xray_CustomEvent@plt // force dependency to symbol
1112 // ...
1113 // <jump here>
1114 //
1115 // After patching, it would look something like:
1116 //
1117 // nopw (2-byte nop)
1118 // ...
1119 // callq __xrayCustomEvent // already lowered
1120 // ...
1121 //
1122 // ---
1123 // First we emit the label and the jump.
1124 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1125 OutStreamer->AddComment("# XRay Custom Event Log");
1126 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1127 OutStreamer->emitLabel(CurSled);
1128
1129 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1130 // an operand (computed as an offset from the jmp instruction).
1131 // FIXME: Find another less hacky way do force the relative jump.
1132 OutStreamer->emitBinaryData("\xeb\x0f");
1133
1134 // The default C calling convention will place two arguments into %rcx and
1135 // %rdx -- so we only work with those.
1136 const Register DestRegs[] = {X86::RDI, X86::RSI};
1137 bool UsedMask[] = {false, false};
1138 // Filled out in loop.
1139 Register SrcRegs[] = {0, 0};
1140
1141 // Then we put the operands in the %rdi and %rsi registers. We spill the
1142 // values in the register before we clobber them, and mark them as used in
1143 // UsedMask. In case the arguments are already in the correct register, we use
1144 // emit nops appropriately sized to keep the sled the same size in every
1145 // situation.
1146 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1147 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I));
1148 Op.isValid()) {
1149 assert(Op.isReg() && "Only support arguments in registers");
1150 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64);
1151 assert(SrcRegs[I].isValid() && "Invalid operand");
1152 if (SrcRegs[I] != DestRegs[I]) {
1153 UsedMask[I] = true;
1154 EmitAndCountInstruction(
1155 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1156 } else {
1157 emitX86Nops(*OutStreamer, 4, Subtarget);
1158 }
1159 }
1160
1161 // Now that the register values are stashed, mov arguments into place.
1162 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1163 // earlier DestReg. We will have already overwritten over the register before
1164 // we can copy from it.
1165 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1166 if (SrcRegs[I] != DestRegs[I])
1167 EmitAndCountInstruction(
1168 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1169
1170 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1171 // name of the trampoline to be implemented by the XRay runtime.
1172 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1176
1177 // Emit the call instruction.
1178 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1179 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1180
1181 // Restore caller-saved and used registers.
1182 for (unsigned I = sizeof UsedMask; I-- > 0;)
1183 if (UsedMask[I])
1184 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1185 else
1186 emitX86Nops(*OutStreamer, 1, Subtarget);
1187
1188 OutStreamer->AddComment("xray custom event end.");
1189
1190 // Record the sled version. Version 0 of this sled was spelled differently, so
1191 // we let the runtime handle the different offsets we're using. Version 2
1192 // changed the absolute address to a PC-relative address.
1193 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1194}
1195
1196void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1197 X86MCInstLower &MCIL) {
1198 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1199
1200 NoAutoPaddingScope NoPadScope(*OutStreamer);
1201
1202 // We want to emit the following pattern, which follows the x86 calling
1203 // convention to prepare for the trampoline call to be patched in.
1204 //
1205 // .p2align 1, ...
1206 // .Lxray_event_sled_N:
1207 // jmp +N // jump across the instrumentation sled
1208 // ... // set up arguments in register
1209 // callq __xray_TypedEvent@plt // force dependency to symbol
1210 // ...
1211 // <jump here>
1212 //
1213 // After patching, it would look something like:
1214 //
1215 // nopw (2-byte nop)
1216 // ...
1217 // callq __xrayTypedEvent // already lowered
1218 // ...
1219 //
1220 // ---
1221 // First we emit the label and the jump.
1222 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1223 OutStreamer->AddComment("# XRay Typed Event Log");
1224 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1225 OutStreamer->emitLabel(CurSled);
1226
1227 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1228 // an operand (computed as an offset from the jmp instruction).
1229 // FIXME: Find another less hacky way do force the relative jump.
1230 OutStreamer->emitBinaryData("\xeb\x14");
1231
1232 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1233 // so we'll work with those. Or we may be called via SystemV, in which case
1234 // we don't have to do any translation.
1235 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1236 bool UsedMask[] = {false, false, false};
1237
1238 // Will fill out src regs in the loop.
1239 Register SrcRegs[] = {0, 0, 0};
1240
1241 // Then we put the operands in the SystemV registers. We spill the values in
1242 // the registers before we clobber them, and mark them as used in UsedMask.
1243 // In case the arguments are already in the correct register, we emit nops
1244 // appropriately sized to keep the sled the same size in every situation.
1245 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1246 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I));
1247 Op.isValid()) {
1248 // TODO: Is register only support adequate?
1249 assert(Op.isReg() && "Only supports arguments in registers");
1250 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64);
1251 assert(SrcRegs[I].isValid() && "Invalid operand");
1252 if (SrcRegs[I] != DestRegs[I]) {
1253 UsedMask[I] = true;
1254 EmitAndCountInstruction(
1255 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1256 } else {
1257 emitX86Nops(*OutStreamer, 4, Subtarget);
1258 }
1259 }
1260
1261 // In the above loop we only stash all of the destination registers or emit
1262 // nops if the arguments are already in the right place. Doing the actually
1263 // moving is postponed until after all the registers are stashed so nothing
1264 // is clobbers. We've already added nops to account for the size of mov and
1265 // push if the register is in the right place, so we only have to worry about
1266 // emitting movs.
1267 // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1268 // earlier DestReg. We will have already overwritten over the register before
1269 // we can copy from it.
1270 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1271 if (UsedMask[I])
1272 EmitAndCountInstruction(
1273 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1274
1275 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1276 // name of the trampoline to be implemented by the XRay runtime.
1277 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1281
1282 // Emit the call instruction.
1283 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1284 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1285
1286 // Restore caller-saved and used registers.
1287 for (unsigned I = sizeof UsedMask; I-- > 0;)
1288 if (UsedMask[I])
1289 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1290 else
1291 emitX86Nops(*OutStreamer, 1, Subtarget);
1292
1293 OutStreamer->AddComment("xray typed event end.");
1294
1295 // Record the sled version.
1296 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1297}
1298
1299void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1300 X86MCInstLower &MCIL) {
1301
1302 NoAutoPaddingScope NoPadScope(*OutStreamer);
1303
1304 const Function &F = MF->getFunction();
1305 if (F.hasFnAttribute("patchable-function-entry")) {
1306 unsigned Num;
1307 if (F.getFnAttribute("patchable-function-entry")
1308 .getValueAsString()
1309 .getAsInteger(10, Num))
1310 return;
1311 emitX86Nops(*OutStreamer, Num, Subtarget);
1312 return;
1313 }
1314 // We want to emit the following pattern:
1315 //
1316 // .p2align 1, ...
1317 // .Lxray_sled_N:
1318 // jmp .tmpN
1319 // # 9 bytes worth of noops
1320 //
1321 // We need the 9 bytes because at runtime, we'd be patching over the full 11
1322 // bytes with the following pattern:
1323 //
1324 // mov %r10, <function id, 32-bit> // 6 bytes
1325 // call <relative offset, 32-bits> // 5 bytes
1326 //
1327 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1328 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1329 OutStreamer->emitLabel(CurSled);
1330
1331 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1332 // an operand (computed as an offset from the jmp instruction).
1333 // FIXME: Find another less hacky way do force the relative jump.
1334 OutStreamer->emitBytes("\xeb\x09");
1335 emitX86Nops(*OutStreamer, 9, Subtarget);
1337}
1338
1339void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1340 X86MCInstLower &MCIL) {
1341 NoAutoPaddingScope NoPadScope(*OutStreamer);
1342
1343 // Since PATCHABLE_RET takes the opcode of the return statement as an
1344 // argument, we use that to emit the correct form of the RET that we want.
1345 // i.e. when we see this:
1346 //
1347 // PATCHABLE_RET X86::RET ...
1348 //
1349 // We should emit the RET followed by sleds.
1350 //
1351 // .p2align 1, ...
1352 // .Lxray_sled_N:
1353 // ret # or equivalent instruction
1354 // # 10 bytes worth of noops
1355 //
1356 // This just makes sure that the alignment for the next instruction is 2.
1357 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1358 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1359 OutStreamer->emitLabel(CurSled);
1360 unsigned OpCode = MI.getOperand(0).getImm();
1361 MCInst Ret;
1362 Ret.setOpcode(OpCode);
1363 for (auto &MO : drop_begin(MI.operands()))
1364 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid())
1365 Ret.addOperand(Op);
1366 OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1367 emitX86Nops(*OutStreamer, 10, Subtarget);
1369}
1370
1371void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1372 X86MCInstLower &MCIL) {
1373 MCInst TC;
1374 TC.setOpcode(convertTailJumpOpcode(MI.getOperand(0).getImm()));
1375 // Drop the tail jump opcode.
1376 auto TCOperands = drop_begin(MI.operands());
1377 bool IsConditional = TC.getOpcode() == X86::JCC_1;
1378 MCSymbol *FallthroughLabel;
1379 if (IsConditional) {
1380 // Rewrite:
1381 // je target
1382 //
1383 // To:
1384 // jne .fallthrough
1385 // .p2align 1, ...
1386 // .Lxray_sled_N:
1387 // SLED_CODE
1388 // jmp target
1389 // .fallthrough:
1390 FallthroughLabel = OutContext.createTempSymbol();
1392 *OutStreamer,
1393 MCInstBuilder(X86::JCC_1)
1394 .addExpr(MCSymbolRefExpr::create(FallthroughLabel, OutContext))
1396 static_cast<X86::CondCode>(MI.getOperand(2).getImm()))));
1397 TC.setOpcode(X86::JMP_1);
1398 // Drop the condition code.
1399 TCOperands = drop_end(TCOperands);
1400 }
1401
1402 NoAutoPaddingScope NoPadScope(*OutStreamer);
1403
1404 // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1405 // instruction so we lower that particular instruction and its operands.
1406 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1407 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1408 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1409 // tail call much like how we have it in PATCHABLE_RET.
1410 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1411 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1412 OutStreamer->emitLabel(CurSled);
1414
1415 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1416 // an operand (computed as an offset from the jmp instruction).
1417 // FIXME: Find another less hacky way do force the relative jump.
1418 OutStreamer->emitBytes("\xeb\x09");
1419 emitX86Nops(*OutStreamer, 9, Subtarget);
1420 OutStreamer->emitLabel(Target);
1421 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1422
1423 // Before emitting the instruction, add a comment to indicate that this is
1424 // indeed a tail call.
1425 OutStreamer->AddComment("TAILCALL");
1426 for (auto &MO : TCOperands)
1427 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid())
1428 TC.addOperand(Op);
1429 OutStreamer->emitInstruction(TC, getSubtargetInfo());
1430
1431 if (IsConditional)
1432 OutStreamer->emitLabel(FallthroughLabel);
1433}
1434
1435static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) {
1436 if (X86II::isKMasked(MI->getDesc().TSFlags)) {
1437 // Skip mask operand.
1438 ++SrcIdx;
1439 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
1440 // Skip passthru operand.
1441 ++SrcIdx;
1442 }
1443 }
1444 return SrcIdx;
1445}
1446
1448 unsigned SrcOpIdx) {
1449 const MachineOperand &DstOp = MI->getOperand(0);
1451
1452 // Handle AVX512 MASK/MASXZ write mask comments.
1453 // MASK: zmmX {%kY}
1454 // MASKZ: zmmX {%kY} {z}
1455 if (X86II::isKMasked(MI->getDesc().TSFlags)) {
1456 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOpIdx - 1);
1458 CS << " {%" << Mask << "}";
1459 if (!X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
1460 CS << " {z}";
1461 }
1462 }
1463}
1464
1465static void printShuffleMask(raw_ostream &CS, StringRef Src1Name,
1466 StringRef Src2Name, ArrayRef<int> Mask) {
1467 // One source operand, fix the mask to print all elements in one span.
1468 SmallVector<int, 8> ShuffleMask(Mask);
1469 if (Src1Name == Src2Name)
1470 for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1471 if (ShuffleMask[i] >= e)
1472 ShuffleMask[i] -= e;
1473
1474 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1475 if (i != 0)
1476 CS << ",";
1477 if (ShuffleMask[i] == SM_SentinelZero) {
1478 CS << "zero";
1479 continue;
1480 }
1481
1482 // Otherwise, it must come from src1 or src2. Print the span of elements
1483 // that comes from this src.
1484 bool isSrc1 = ShuffleMask[i] < (int)e;
1485 CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1486
1487 bool IsFirst = true;
1488 while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1489 (ShuffleMask[i] < (int)e) == isSrc1) {
1490 if (!IsFirst)
1491 CS << ',';
1492 else
1493 IsFirst = false;
1494 if (ShuffleMask[i] == SM_SentinelUndef)
1495 CS << "u";
1496 else
1497 CS << ShuffleMask[i] % (int)e;
1498 ++i;
1499 }
1500 CS << ']';
1501 --i; // For loop increments element #.
1502 }
1503}
1504
1505static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1506 unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1507 std::string Comment;
1508
1509 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1510 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1511 StringRef Src1Name = SrcOp1.isReg()
1513 : "mem";
1514 StringRef Src2Name = SrcOp2.isReg()
1516 : "mem";
1517
1518 raw_string_ostream CS(Comment);
1519 printDstRegisterName(CS, MI, SrcOp1Idx);
1520 CS << " = ";
1521 printShuffleMask(CS, Src1Name, Src2Name, Mask);
1522
1523 return Comment;
1524}
1525
1526static void printConstant(const APInt &Val, raw_ostream &CS,
1527 bool PrintZero = false) {
1528 if (Val.getBitWidth() <= 64) {
1529 CS << (PrintZero ? 0ULL : Val.getZExtValue());
1530 } else {
1531 // print multi-word constant as (w0,w1)
1532 CS << "(";
1533 for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1534 if (i > 0)
1535 CS << ",";
1536 CS << (PrintZero ? 0ULL : Val.getRawData()[i]);
1537 }
1538 CS << ")";
1539 }
1540}
1541
1542static void printConstant(const APFloat &Flt, raw_ostream &CS,
1543 bool PrintZero = false) {
1544 SmallString<32> Str;
1545 // Force scientific notation to distinguish from integers.
1546 if (PrintZero)
1547 APFloat::getZero(Flt.getSemantics()).toString(Str, 0, 0);
1548 else
1549 Flt.toString(Str, 0, 0);
1550 CS << Str;
1551}
1552
1553static void printConstant(const Constant *COp, unsigned BitWidth,
1554 raw_ostream &CS, bool PrintZero = false) {
1555 if (isa<UndefValue>(COp)) {
1556 CS << "u";
1557 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1558 if (auto VTy = dyn_cast<FixedVectorType>(CI->getType())) {
1559 for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
1560 if (I != 0)
1561 CS << ',';
1562 printConstant(CI->getValue(), CS, PrintZero);
1563 }
1564 } else
1565 printConstant(CI->getValue(), CS, PrintZero);
1566 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1567 if (auto VTy = dyn_cast<FixedVectorType>(CF->getType())) {
1568 for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
1569 if (I != 0)
1570 CS << ',';
1571 printConstant(CF->getValueAPF(), CS, PrintZero);
1572 }
1573 } else
1574 printConstant(CF->getValueAPF(), CS, PrintZero);
1575 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) {
1576 Type *EltTy = CDS->getElementType();
1577 bool IsInteger = EltTy->isIntegerTy();
1578 bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
1579 unsigned EltBits = EltTy->getPrimitiveSizeInBits();
1580 unsigned E = std::min(BitWidth / EltBits, (unsigned)CDS->getNumElements());
1581 if ((BitWidth % EltBits) == 0) {
1582 for (unsigned I = 0; I != E; ++I) {
1583 if (I != 0)
1584 CS << ",";
1585 if (IsInteger)
1586 printConstant(CDS->getElementAsAPInt(I), CS, PrintZero);
1587 else if (IsFP)
1588 printConstant(CDS->getElementAsAPFloat(I), CS, PrintZero);
1589 else
1590 CS << "?";
1591 }
1592 } else {
1593 CS << "?";
1594 }
1595 } else if (auto *CV = dyn_cast<ConstantVector>(COp)) {
1596 unsigned EltBits = CV->getType()->getScalarSizeInBits();
1597 unsigned E = std::min(BitWidth / EltBits, CV->getNumOperands());
1598 if ((BitWidth % EltBits) == 0) {
1599 for (unsigned I = 0; I != E; ++I) {
1600 if (I != 0)
1601 CS << ",";
1602 printConstant(CV->getOperand(I), EltBits, CS, PrintZero);
1603 }
1604 } else {
1605 CS << "?";
1606 }
1607 } else {
1608 CS << "?";
1609 }
1610}
1611
1612static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer,
1613 int SclWidth, int VecWidth,
1614 const char *ShuffleComment) {
1615 unsigned SrcIdx = getSrcIdx(MI, 1);
1616
1617 std::string Comment;
1618 raw_string_ostream CS(Comment);
1619 printDstRegisterName(CS, MI, SrcIdx);
1620 CS << " = ";
1621
1622 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) {
1623 CS << "[";
1624 printConstant(C, SclWidth, CS);
1625 for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
1626 CS << ",";
1627 printConstant(C, SclWidth, CS, true);
1628 }
1629 CS << "]";
1630 OutStreamer.AddComment(CS.str());
1631 return; // early-out
1632 }
1633
1634 // We didn't find a constant load, fallback to a shuffle mask decode.
1635 CS << ShuffleComment;
1636 OutStreamer.AddComment(CS.str());
1637}
1638
1639static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
1640 int Repeats, int BitWidth) {
1641 unsigned SrcIdx = getSrcIdx(MI, 1);
1642 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) {
1643 std::string Comment;
1644 raw_string_ostream CS(Comment);
1645 printDstRegisterName(CS, MI, SrcIdx);
1646 CS << " = [";
1647 for (int l = 0; l != Repeats; ++l) {
1648 if (l != 0)
1649 CS << ",";
1650 printConstant(C, BitWidth, CS);
1651 }
1652 CS << "]";
1653 OutStreamer.AddComment(CS.str());
1654 }
1655}
1656
1657static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1658 int SrcEltBits, int DstEltBits, bool IsSext) {
1659 unsigned SrcIdx = getSrcIdx(MI, 1);
1660 auto *C = X86::getConstantFromPool(*MI, SrcIdx);
1661 if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) {
1662 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
1663 int NumElts = CDS->getNumElements();
1664 std::string Comment;
1665 raw_string_ostream CS(Comment);
1666 printDstRegisterName(CS, MI, SrcIdx);
1667 CS << " = [";
1668 for (int i = 0; i != NumElts; ++i) {
1669 if (i != 0)
1670 CS << ",";
1671 if (CDS->getElementType()->isIntegerTy()) {
1672 APInt Elt = CDS->getElementAsAPInt(i);
1673 Elt = IsSext ? Elt.sext(DstEltBits) : Elt.zext(DstEltBits);
1674 printConstant(Elt, CS);
1675 } else
1676 CS << "?";
1677 }
1678 CS << "]";
1679 OutStreamer.AddComment(CS.str());
1680 return true;
1681 }
1682 }
1683
1684 return false;
1685}
1686static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1687 int SrcEltBits, int DstEltBits) {
1688 printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, true);
1689}
1690static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1691 int SrcEltBits, int DstEltBits) {
1692 if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, false))
1693 return;
1694
1695 // We didn't find a constant load, fallback to a shuffle mask decode.
1696 std::string Comment;
1697 raw_string_ostream CS(Comment);
1699 CS << " = ";
1700
1701 SmallVector<int> Mask;
1702 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1703 assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 &&
1704 "Illegal extension ratio");
1705 DecodeZeroExtendMask(SrcEltBits, DstEltBits, Width / DstEltBits, false, Mask);
1706 printShuffleMask(CS, "mem", "", Mask);
1707
1708 OutStreamer.AddComment(CS.str());
1709}
1710
1711void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1712 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1713 assert((getSubtarget().isOSWindows() || getSubtarget().isUEFI()) &&
1714 "SEH_ instruction Windows and UEFI only");
1715
1716 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1717 if (EmitFPOData) {
1718 X86TargetStreamer *XTS =
1719 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1720 switch (MI->getOpcode()) {
1721 case X86::SEH_PushReg:
1722 XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1723 break;
1724 case X86::SEH_StackAlloc:
1725 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1726 break;
1727 case X86::SEH_StackAlign:
1728 XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1729 break;
1730 case X86::SEH_SetFrame:
1731 assert(MI->getOperand(1).getImm() == 0 &&
1732 ".cv_fpo_setframe takes no offset");
1733 XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1734 break;
1735 case X86::SEH_EndPrologue:
1736 XTS->emitFPOEndPrologue();
1737 break;
1738 case X86::SEH_SaveReg:
1739 case X86::SEH_SaveXMM:
1740 case X86::SEH_PushFrame:
1741 llvm_unreachable("SEH_ directive incompatible with FPO");
1742 break;
1743 default:
1744 llvm_unreachable("expected SEH_ instruction");
1745 }
1746 return;
1747 }
1748
1749 // Otherwise, use the .seh_ directives for all other Windows platforms.
1750 switch (MI->getOpcode()) {
1751 case X86::SEH_PushReg:
1752 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm());
1753 break;
1754
1755 case X86::SEH_SaveReg:
1756 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(),
1757 MI->getOperand(1).getImm());
1758 break;
1759
1760 case X86::SEH_SaveXMM:
1761 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(),
1762 MI->getOperand(1).getImm());
1763 break;
1764
1765 case X86::SEH_StackAlloc:
1766 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm());
1767 break;
1768
1769 case X86::SEH_SetFrame:
1770 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(),
1771 MI->getOperand(1).getImm());
1772 break;
1773
1774 case X86::SEH_PushFrame:
1775 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm());
1776 break;
1777
1778 case X86::SEH_EndPrologue:
1779 OutStreamer->emitWinCFIEndProlog();
1780 break;
1781
1782 case X86::SEH_BeginEpilogue:
1783 OutStreamer->emitWinCFIBeginEpilogue();
1784 break;
1785
1786 case X86::SEH_EndEpilogue:
1787 OutStreamer->emitWinCFIEndEpilogue();
1788 break;
1789
1790 case X86::SEH_UnwindV2Start:
1791 OutStreamer->emitWinCFIUnwindV2Start();
1792 break;
1793
1794 case X86::SEH_UnwindVersion:
1795 OutStreamer->emitWinCFIUnwindVersion(MI->getOperand(0).getImm());
1796 break;
1797
1798 default:
1799 llvm_unreachable("expected SEH_ instruction");
1800 }
1801}
1802
1804 MCStreamer &OutStreamer) {
1805 switch (MI->getOpcode()) {
1806 // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1807 // a constant shuffle mask. We won't be able to do this at the MC layer
1808 // because the mask isn't an immediate.
1809 case X86::PSHUFBrm:
1810 case X86::VPSHUFBrm:
1811 case X86::VPSHUFBYrm:
1812 case X86::VPSHUFBZ128rm:
1813 case X86::VPSHUFBZ128rmk:
1814 case X86::VPSHUFBZ128rmkz:
1815 case X86::VPSHUFBZ256rm:
1816 case X86::VPSHUFBZ256rmk:
1817 case X86::VPSHUFBZ256rmkz:
1818 case X86::VPSHUFBZrm:
1819 case X86::VPSHUFBZrmk:
1820 case X86::VPSHUFBZrmkz: {
1821 unsigned SrcIdx = getSrcIdx(MI, 1);
1822 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1823 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1825 DecodePSHUFBMask(C, Width, Mask);
1826 if (!Mask.empty())
1827 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1828 }
1829 break;
1830 }
1831
1832 case X86::VPERMILPSrm:
1833 case X86::VPERMILPSYrm:
1834 case X86::VPERMILPSZ128rm:
1835 case X86::VPERMILPSZ128rmk:
1836 case X86::VPERMILPSZ128rmkz:
1837 case X86::VPERMILPSZ256rm:
1838 case X86::VPERMILPSZ256rmk:
1839 case X86::VPERMILPSZ256rmkz:
1840 case X86::VPERMILPSZrm:
1841 case X86::VPERMILPSZrmk:
1842 case X86::VPERMILPSZrmkz: {
1843 unsigned SrcIdx = getSrcIdx(MI, 1);
1844 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1845 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1847 DecodeVPERMILPMask(C, 32, Width, Mask);
1848 if (!Mask.empty())
1849 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1850 }
1851 break;
1852 }
1853 case X86::VPERMILPDrm:
1854 case X86::VPERMILPDYrm:
1855 case X86::VPERMILPDZ128rm:
1856 case X86::VPERMILPDZ128rmk:
1857 case X86::VPERMILPDZ128rmkz:
1858 case X86::VPERMILPDZ256rm:
1859 case X86::VPERMILPDZ256rmk:
1860 case X86::VPERMILPDZ256rmkz:
1861 case X86::VPERMILPDZrm:
1862 case X86::VPERMILPDZrmk:
1863 case X86::VPERMILPDZrmkz: {
1864 unsigned SrcIdx = getSrcIdx(MI, 1);
1865 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1866 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1868 DecodeVPERMILPMask(C, 64, Width, Mask);
1869 if (!Mask.empty())
1870 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1871 }
1872 break;
1873 }
1874
1875 case X86::VPERMIL2PDrm:
1876 case X86::VPERMIL2PSrm:
1877 case X86::VPERMIL2PDYrm:
1878 case X86::VPERMIL2PSYrm: {
1879 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
1880 "Unexpected number of operands!");
1881
1882 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
1883 if (!CtrlOp.isImm())
1884 break;
1885
1886 unsigned ElSize;
1887 switch (MI->getOpcode()) {
1888 default: llvm_unreachable("Invalid opcode");
1889 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
1890 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
1891 }
1892
1893 if (auto *C = X86::getConstantFromPool(*MI, 3)) {
1894 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1896 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
1897 if (!Mask.empty())
1898 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
1899 }
1900 break;
1901 }
1902
1903 case X86::VPPERMrrm: {
1904 if (auto *C = X86::getConstantFromPool(*MI, 3)) {
1905 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1907 DecodeVPPERMMask(C, Width, Mask);
1908 if (!Mask.empty())
1909 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
1910 }
1911 break;
1912 }
1913
1914 case X86::MMX_MOVQ64rm: {
1915 if (auto *C = X86::getConstantFromPool(*MI, 1)) {
1916 std::string Comment;
1917 raw_string_ostream CS(Comment);
1918 const MachineOperand &DstOp = MI->getOperand(0);
1920 if (auto *CF = dyn_cast<ConstantFP>(C)) {
1921 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
1922 OutStreamer.AddComment(CS.str());
1923 }
1924 }
1925 break;
1926 }
1927
1928#define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \
1929 case X86::Prefix##Instr##Suffix##rm##Postfix:
1930
1931#define CASE_ARITH_RM(Instr) \
1932 INSTR_CASE(, Instr, , ) /* SSE */ \
1933 INSTR_CASE(V, Instr, , ) /* AVX-128 */ \
1934 INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \
1935 INSTR_CASE(V, Instr, Z128, ) \
1936 INSTR_CASE(V, Instr, Z128, k) \
1937 INSTR_CASE(V, Instr, Z128, kz) \
1938 INSTR_CASE(V, Instr, Z256, ) \
1939 INSTR_CASE(V, Instr, Z256, k) \
1940 INSTR_CASE(V, Instr, Z256, kz) \
1941 INSTR_CASE(V, Instr, Z, ) \
1942 INSTR_CASE(V, Instr, Z, k) \
1943 INSTR_CASE(V, Instr, Z, kz)
1944
1945 // TODO: Add additional instructions when useful.
1946 CASE_ARITH_RM(PMADDUBSW) {
1947 unsigned SrcIdx = getSrcIdx(MI, 1);
1948 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1949 if (C->getType()->getScalarSizeInBits() == 8) {
1950 std::string Comment;
1951 raw_string_ostream CS(Comment);
1952 unsigned VectorWidth =
1953 X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1954 CS << "[";
1955 printConstant(C, VectorWidth, CS);
1956 CS << "]";
1957 OutStreamer.AddComment(CS.str());
1958 }
1959 }
1960 break;
1961 }
1962
1963 CASE_ARITH_RM(PMADDWD)
1964 CASE_ARITH_RM(PMULLW)
1965 CASE_ARITH_RM(PMULHW)
1966 CASE_ARITH_RM(PMULHUW)
1967 CASE_ARITH_RM(PMULHRSW) {
1968 unsigned SrcIdx = getSrcIdx(MI, 1);
1969 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1970 if (C->getType()->getScalarSizeInBits() == 16) {
1971 std::string Comment;
1972 raw_string_ostream CS(Comment);
1973 unsigned VectorWidth =
1974 X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1975 CS << "[";
1976 printConstant(C, VectorWidth, CS);
1977 CS << "]";
1978 OutStreamer.AddComment(CS.str());
1979 }
1980 }
1981 break;
1982 }
1983
1984#define MASK_AVX512_CASE(Instr) \
1985 case Instr: \
1986 case Instr##k: \
1987 case Instr##kz:
1988
1989 case X86::MOVSDrm:
1990 case X86::VMOVSDrm:
1991 MASK_AVX512_CASE(X86::VMOVSDZrm)
1992 case X86::MOVSDrm_alt:
1993 case X86::VMOVSDrm_alt:
1994 case X86::VMOVSDZrm_alt:
1995 case X86::MOVQI2PQIrm:
1996 case X86::VMOVQI2PQIrm:
1997 case X86::VMOVQI2PQIZrm:
1998 printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero");
1999 break;
2000
2001 MASK_AVX512_CASE(X86::VMOVSHZrm)
2002 case X86::VMOVSHZrm_alt:
2003 printZeroUpperMove(MI, OutStreamer, 16, 128,
2004 "mem[0],zero,zero,zero,zero,zero,zero,zero");
2005 break;
2006
2007 case X86::MOVSSrm:
2008 case X86::VMOVSSrm:
2009 MASK_AVX512_CASE(X86::VMOVSSZrm)
2010 case X86::MOVSSrm_alt:
2011 case X86::VMOVSSrm_alt:
2012 case X86::VMOVSSZrm_alt:
2013 case X86::MOVDI2PDIrm:
2014 case X86::VMOVDI2PDIrm:
2015 case X86::VMOVDI2PDIZrm:
2016 printZeroUpperMove(MI, OutStreamer, 32, 128, "mem[0],zero,zero,zero");
2017 break;
2018
2019#define MOV_CASE(Prefix, Suffix) \
2020 case X86::Prefix##MOVAPD##Suffix##rm: \
2021 case X86::Prefix##MOVAPS##Suffix##rm: \
2022 case X86::Prefix##MOVUPD##Suffix##rm: \
2023 case X86::Prefix##MOVUPS##Suffix##rm: \
2024 case X86::Prefix##MOVDQA##Suffix##rm: \
2025 case X86::Prefix##MOVDQU##Suffix##rm:
2026
2027#define MOV_AVX512_CASE(Suffix, Postfix) \
2028 case X86::VMOVDQA64##Suffix##rm##Postfix: \
2029 case X86::VMOVDQA32##Suffix##rm##Postfix: \
2030 case X86::VMOVDQU64##Suffix##rm##Postfix: \
2031 case X86::VMOVDQU32##Suffix##rm##Postfix: \
2032 case X86::VMOVDQU16##Suffix##rm##Postfix: \
2033 case X86::VMOVDQU8##Suffix##rm##Postfix: \
2034 case X86::VMOVAPS##Suffix##rm##Postfix: \
2035 case X86::VMOVAPD##Suffix##rm##Postfix: \
2036 case X86::VMOVUPS##Suffix##rm##Postfix: \
2037 case X86::VMOVUPD##Suffix##rm##Postfix:
2038
2039#define CASE_128_MOV_RM() \
2040 MOV_CASE(, ) /* SSE */ \
2041 MOV_CASE(V, ) /* AVX-128 */ \
2042 MOV_AVX512_CASE(Z128, ) \
2043 MOV_AVX512_CASE(Z128, k) \
2044 MOV_AVX512_CASE(Z128, kz)
2045
2046#define CASE_256_MOV_RM() \
2047 MOV_CASE(V, Y) /* AVX-256 */ \
2048 MOV_AVX512_CASE(Z256, ) \
2049 MOV_AVX512_CASE(Z256, k) \
2050 MOV_AVX512_CASE(Z256, kz) \
2051
2052#define CASE_512_MOV_RM() \
2053 MOV_AVX512_CASE(Z, ) \
2054 MOV_AVX512_CASE(Z, k) \
2055 MOV_AVX512_CASE(Z, kz) \
2056
2057 // For loads from a constant pool to a vector register, print the constant
2058 // loaded.
2060 printBroadcast(MI, OutStreamer, 1, 128);
2061 break;
2063 printBroadcast(MI, OutStreamer, 1, 256);
2064 break;
2066 printBroadcast(MI, OutStreamer, 1, 512);
2067 break;
2068 case X86::VBROADCASTF128rm:
2069 case X86::VBROADCASTI128rm:
2070 MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
2071 MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm)
2072 MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
2073 MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm)
2074 printBroadcast(MI, OutStreamer, 2, 128);
2075 break;
2076 MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm)
2077 MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm)
2078 MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm)
2079 MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm)
2080 printBroadcast(MI, OutStreamer, 4, 128);
2081 break;
2082 MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm)
2083 MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm)
2084 MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm)
2085 MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm)
2086 printBroadcast(MI, OutStreamer, 2, 256);
2087 break;
2088
2089 // For broadcast loads from a constant pool to a vector register, repeatedly
2090 // print the constant loaded.
2091 case X86::MOVDDUPrm:
2092 case X86::VMOVDDUPrm:
2093 MASK_AVX512_CASE(X86::VMOVDDUPZ128rm)
2094 case X86::VPBROADCASTQrm:
2095 MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm)
2096 printBroadcast(MI, OutStreamer, 2, 64);
2097 break;
2098 case X86::VBROADCASTSDYrm:
2099 MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm)
2100 case X86::VPBROADCASTQYrm:
2101 MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm)
2102 printBroadcast(MI, OutStreamer, 4, 64);
2103 break;
2104 MASK_AVX512_CASE(X86::VBROADCASTSDZrm)
2105 MASK_AVX512_CASE(X86::VPBROADCASTQZrm)
2106 printBroadcast(MI, OutStreamer, 8, 64);
2107 break;
2108 case X86::VBROADCASTSSrm:
2109 MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm)
2110 case X86::VPBROADCASTDrm:
2111 MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm)
2112 printBroadcast(MI, OutStreamer, 4, 32);
2113 break;
2114 case X86::VBROADCASTSSYrm:
2115 MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm)
2116 case X86::VPBROADCASTDYrm:
2117 MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm)
2118 printBroadcast(MI, OutStreamer, 8, 32);
2119 break;
2120 MASK_AVX512_CASE(X86::VBROADCASTSSZrm)
2121 MASK_AVX512_CASE(X86::VPBROADCASTDZrm)
2122 printBroadcast(MI, OutStreamer, 16, 32);
2123 break;
2124 case X86::VPBROADCASTWrm:
2125 MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm)
2126 printBroadcast(MI, OutStreamer, 8, 16);
2127 break;
2128 case X86::VPBROADCASTWYrm:
2129 MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm)
2130 printBroadcast(MI, OutStreamer, 16, 16);
2131 break;
2132 MASK_AVX512_CASE(X86::VPBROADCASTWZrm)
2133 printBroadcast(MI, OutStreamer, 32, 16);
2134 break;
2135 case X86::VPBROADCASTBrm:
2136 MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm)
2137 printBroadcast(MI, OutStreamer, 16, 8);
2138 break;
2139 case X86::VPBROADCASTBYrm:
2140 MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm)
2141 printBroadcast(MI, OutStreamer, 32, 8);
2142 break;
2143 MASK_AVX512_CASE(X86::VPBROADCASTBZrm)
2144 printBroadcast(MI, OutStreamer, 64, 8);
2145 break;
2146
2147#define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \
2148 case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix:
2149
2150#define CASE_MOVX_RM(Ext, Type) \
2151 MOVX_CASE(, Ext, Type, , ) \
2152 MOVX_CASE(V, Ext, Type, , ) \
2153 MOVX_CASE(V, Ext, Type, Y, ) \
2154 MOVX_CASE(V, Ext, Type, Z128, ) \
2155 MOVX_CASE(V, Ext, Type, Z128, k ) \
2156 MOVX_CASE(V, Ext, Type, Z128, kz ) \
2157 MOVX_CASE(V, Ext, Type, Z256, ) \
2158 MOVX_CASE(V, Ext, Type, Z256, k ) \
2159 MOVX_CASE(V, Ext, Type, Z256, kz ) \
2160 MOVX_CASE(V, Ext, Type, Z, ) \
2161 MOVX_CASE(V, Ext, Type, Z, k ) \
2162 MOVX_CASE(V, Ext, Type, Z, kz )
2163
2164 CASE_MOVX_RM(SX, BD)
2165 printSignExtend(MI, OutStreamer, 8, 32);
2166 break;
2167 CASE_MOVX_RM(SX, BQ)
2168 printSignExtend(MI, OutStreamer, 8, 64);
2169 break;
2170 CASE_MOVX_RM(SX, BW)
2171 printSignExtend(MI, OutStreamer, 8, 16);
2172 break;
2173 CASE_MOVX_RM(SX, DQ)
2174 printSignExtend(MI, OutStreamer, 32, 64);
2175 break;
2176 CASE_MOVX_RM(SX, WD)
2177 printSignExtend(MI, OutStreamer, 16, 32);
2178 break;
2179 CASE_MOVX_RM(SX, WQ)
2180 printSignExtend(MI, OutStreamer, 16, 64);
2181 break;
2182
2183 CASE_MOVX_RM(ZX, BD)
2184 printZeroExtend(MI, OutStreamer, 8, 32);
2185 break;
2186 CASE_MOVX_RM(ZX, BQ)
2187 printZeroExtend(MI, OutStreamer, 8, 64);
2188 break;
2189 CASE_MOVX_RM(ZX, BW)
2190 printZeroExtend(MI, OutStreamer, 8, 16);
2191 break;
2192 CASE_MOVX_RM(ZX, DQ)
2193 printZeroExtend(MI, OutStreamer, 32, 64);
2194 break;
2195 CASE_MOVX_RM(ZX, WD)
2196 printZeroExtend(MI, OutStreamer, 16, 32);
2197 break;
2198 CASE_MOVX_RM(ZX, WQ)
2199 printZeroExtend(MI, OutStreamer, 16, 64);
2200 break;
2201 }
2202}
2203
2204// Does the given operand refer to a DLLIMPORT function?
2206 return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_DLLIMPORT);
2207}
2208
2209// Is the given instruction a call to a CFGuard function?
2211 assert(MI->getOpcode() == X86::TAILJMPm64_REX ||
2212 MI->getOpcode() == X86::CALL64m);
2213 const MachineOperand &MO = MI->getOperand(3);
2214 return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_NO_FLAG) &&
2216}
2217
2218// Does the containing block for the given instruction contain any jump table
2219// info (indicating that the block is a dispatch for a jump table)?
2221 const MachineBasicBlock &MBB = *MI->getParent();
2222 for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I)
2223 if (I->isJumpTableDebugInfo())
2224 return true;
2225
2226 return false;
2227}
2228
2230 // FIXME: Enable feature predicate checks once all the test pass.
2231 // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2232 // Subtarget->getFeatureBits());
2233
2234 X86MCInstLower MCInstLowering(*MF, *this);
2235 const X86RegisterInfo *RI =
2236 MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2237
2238 if (MI->getOpcode() == X86::OR64rm) {
2239 for (auto &Opd : MI->operands()) {
2240 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2241 "swift_async_extendedFramePointerFlags") {
2242 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2243 }
2244 }
2245 }
2246
2247 // Add comments for values loaded from constant pool.
2248 if (OutStreamer->isVerboseAsm())
2250
2251 // Add a comment about EVEX compression
2253 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY)
2254 OutStreamer->AddComment("EVEX TO LEGACY Compression ", false);
2255 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2256 OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2257 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX)
2258 OutStreamer->AddComment("EVEX TO EVEX Compression ", false);
2259 }
2260
2261 // We use this to suppress NOP padding for Windows EH.
2262 bool IsTailJump = false;
2263
2264 switch (MI->getOpcode()) {
2265 case TargetOpcode::DBG_VALUE:
2266 llvm_unreachable("Should be handled target independently");
2267
2268 case X86::EH_RETURN:
2269 case X86::EH_RETURN64: {
2270 // Lower these as normal, but add some comments.
2271 Register Reg = MI->getOperand(0).getReg();
2272 OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2274 break;
2275 }
2276 case X86::CLEANUPRET: {
2277 // Lower these as normal, but add some comments.
2278 OutStreamer->AddComment("CLEANUPRET");
2279 break;
2280 }
2281
2282 case X86::CATCHRET: {
2283 // Lower these as normal, but add some comments.
2284 OutStreamer->AddComment("CATCHRET");
2285 break;
2286 }
2287
2288 case X86::ENDBR32:
2289 case X86::ENDBR64: {
2290 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2291 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2292 // non-empty. If MI is the initial ENDBR, place the
2293 // __patchable_function_entries label after ENDBR.
2296 MI == &MF->front().front()) {
2297 MCInst Inst;
2298 MCInstLowering.Lower(MI, Inst);
2299 EmitAndCountInstruction(Inst);
2302 return;
2303 }
2304 break;
2305 }
2306
2307 case X86::TAILJMPd64:
2308 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2309 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2310
2311 if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) {
2312 emitLabelAndRecordForImportCallOptimization(
2313 IMAGE_RETPOLINE_AMD64_IMPORT_BR);
2314 }
2315
2316 // Lower this as normal, but add a comment.
2317 OutStreamer->AddComment("TAILCALL");
2318 IsTailJump = true;
2319 break;
2320
2321 case X86::TAILJMPr:
2322 case X86::TAILJMPm:
2323 case X86::TAILJMPd:
2324 case X86::TAILJMPd_CC:
2325 case X86::TAILJMPr64:
2326 case X86::TAILJMPm64:
2327 case X86::TAILJMPd64_CC:
2328 if (EnableImportCallOptimization)
2329 report_fatal_error("Unexpected TAILJMP instruction was emitted when "
2330 "import call optimization was enabled");
2331
2332 // Lower these as normal, but add some comments.
2333 OutStreamer->AddComment("TAILCALL");
2334 IsTailJump = true;
2335 break;
2336
2337 case X86::TAILJMPm64_REX:
2338 if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2339 emitLabelAndRecordForImportCallOptimization(
2340 IMAGE_RETPOLINE_AMD64_CFG_BR_REX);
2341 }
2342
2343 OutStreamer->AddComment("TAILCALL");
2344 IsTailJump = true;
2345 break;
2346
2347 case X86::TAILJMPr64_REX: {
2348 if (EnableImportCallOptimization) {
2349 assert(MI->getOperand(0).getReg() == X86::RAX &&
2350 "Indirect tail calls with impcall enabled must go through RAX (as "
2351 "enforced by TCRETURNImpCallri64)");
2352 emitLabelAndRecordForImportCallOptimization(
2353 IMAGE_RETPOLINE_AMD64_INDIR_BR);
2354 }
2355
2356 OutStreamer->AddComment("TAILCALL");
2357 IsTailJump = true;
2358 break;
2359 }
2360
2361 case X86::JMP64r:
2362 if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) {
2363 uint16_t EncodedReg =
2364 this->getSubtarget().getRegisterInfo()->getEncodingValue(
2365 MI->getOperand(0).getReg().asMCReg());
2366 emitLabelAndRecordForImportCallOptimization(
2367 (ImportCallKind)(IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST +
2368 EncodedReg));
2369 }
2370 break;
2371
2372 case X86::JMP16r:
2373 case X86::JMP16m:
2374 case X86::JMP32r:
2375 case X86::JMP32m:
2376 case X86::JMP64m:
2377 if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI))
2379 "Unexpected JMP instruction was emitted for a jump-table when import "
2380 "call optimization was enabled");
2381 break;
2382
2383 case X86::TLS_addr32:
2384 case X86::TLS_addr64:
2385 case X86::TLS_addrX32:
2386 case X86::TLS_base_addr32:
2387 case X86::TLS_base_addr64:
2388 case X86::TLS_base_addrX32:
2389 case X86::TLS_desc32:
2390 case X86::TLS_desc64:
2391 return LowerTlsAddr(MCInstLowering, *MI);
2392
2393 case X86::MOVPC32r: {
2394 // This is a pseudo op for a two instruction sequence with a label, which
2395 // looks like:
2396 // call "L1$pb"
2397 // "L1$pb":
2398 // popl %esi
2399
2400 // Emit the call.
2401 MCSymbol *PICBase = MF->getPICBaseSymbol();
2402 // FIXME: We would like an efficient form for this, so we don't have to do a
2403 // lot of extra uniquing.
2404 EmitAndCountInstruction(
2405 MCInstBuilder(X86::CALLpcrel32)
2406 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2407
2408 const X86FrameLowering *FrameLowering =
2409 MF->getSubtarget<X86Subtarget>().getFrameLowering();
2410 bool hasFP = FrameLowering->hasFP(*MF);
2411
2412 // TODO: This is needed only if we require precise CFA.
2413 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2414 !OutStreamer->getDwarfFrameInfos().back().End;
2415
2416 int stackGrowth = -RI->getSlotSize();
2417
2418 if (HasActiveDwarfFrame && !hasFP) {
2419 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2420 MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
2421 }
2422
2423 // Emit the label.
2424 OutStreamer->emitLabel(PICBase);
2425
2426 // popl $reg
2427 EmitAndCountInstruction(
2428 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2429
2430 if (HasActiveDwarfFrame && !hasFP) {
2431 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2432 }
2433 return;
2434 }
2435
2436 case X86::ADD32ri: {
2437 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2438 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2439 break;
2440
2441 // Okay, we have something like:
2442 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2443
2444 // For this, we want to print something like:
2445 // MYGLOBAL + (. - PICBASE)
2446 // However, we can't generate a ".", so just emit a new label here and refer
2447 // to it.
2449 OutStreamer->emitLabel(DotSym);
2450
2451 // Now that we have emitted the label, lower the complex operand expression.
2452 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2453
2454 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2455 const MCExpr *PICBase =
2457 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2458
2459 DotExpr = MCBinaryExpr::createAdd(
2461
2462 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2463 .addReg(MI->getOperand(0).getReg())
2464 .addReg(MI->getOperand(1).getReg())
2465 .addExpr(DotExpr));
2466 return;
2467 }
2468 case TargetOpcode::STATEPOINT:
2469 return LowerSTATEPOINT(*MI, MCInstLowering);
2470
2471 case TargetOpcode::FAULTING_OP:
2472 return LowerFAULTING_OP(*MI, MCInstLowering);
2473
2474 case TargetOpcode::FENTRY_CALL:
2475 return LowerFENTRY_CALL(*MI, MCInstLowering);
2476
2477 case TargetOpcode::PATCHABLE_OP:
2478 return LowerPATCHABLE_OP(*MI, MCInstLowering);
2479
2480 case TargetOpcode::STACKMAP:
2481 return LowerSTACKMAP(*MI);
2482
2483 case TargetOpcode::PATCHPOINT:
2484 return LowerPATCHPOINT(*MI, MCInstLowering);
2485
2486 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2487 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2488
2489 case TargetOpcode::PATCHABLE_RET:
2490 return LowerPATCHABLE_RET(*MI, MCInstLowering);
2491
2492 case TargetOpcode::PATCHABLE_TAIL_CALL:
2493 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2494
2495 case TargetOpcode::PATCHABLE_EVENT_CALL:
2496 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2497
2498 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2499 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2500
2501 case X86::MORESTACK_RET:
2502 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2503 return;
2504
2505 case X86::KCFI_CHECK:
2506 return LowerKCFI_CHECK(*MI);
2507
2508 case X86::ASAN_CHECK_MEMACCESS:
2509 return LowerASAN_CHECK_MEMACCESS(*MI);
2510
2511 case X86::MORESTACK_RET_RESTORE_R10:
2512 // Return, then restore R10.
2513 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2514 EmitAndCountInstruction(
2515 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2516 return;
2517
2518 case X86::SEH_PushReg:
2519 case X86::SEH_SaveReg:
2520 case X86::SEH_SaveXMM:
2521 case X86::SEH_StackAlloc:
2522 case X86::SEH_StackAlign:
2523 case X86::SEH_SetFrame:
2524 case X86::SEH_PushFrame:
2525 case X86::SEH_EndPrologue:
2526 case X86::SEH_EndEpilogue:
2527 case X86::SEH_UnwindV2Start:
2528 case X86::SEH_UnwindVersion:
2529 EmitSEHInstruction(MI);
2530 return;
2531
2532 case X86::SEH_BeginEpilogue: {
2533 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2534 EmitSEHInstruction(MI);
2535 return;
2536 }
2537 case X86::UBSAN_UD1:
2538 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2539 .addReg(X86::EAX)
2540 .addReg(X86::EAX)
2541 .addImm(1)
2542 .addReg(X86::NoRegister)
2543 .addImm(MI->getOperand(0).getImm())
2544 .addReg(X86::NoRegister));
2545 return;
2546 case X86::CALL64pcrel32:
2547 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2548 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2549
2550 if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) {
2551 emitLabelAndRecordForImportCallOptimization(
2552 IMAGE_RETPOLINE_AMD64_IMPORT_CALL);
2553
2554 MCInst TmpInst;
2555 MCInstLowering.Lower(MI, TmpInst);
2556
2557 // For Import Call Optimization to work, we need a the call instruction
2558 // with a rex prefix, and a 5-byte nop after the call instruction.
2559 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
2560 emitCallInstruction(TmpInst);
2561 emitNop(*OutStreamer, 5, Subtarget);
2562 maybeEmitNopAfterCallForWindowsEH(MI);
2563 return;
2564 }
2565
2566 break;
2567
2568 case X86::CALL64r:
2569 if (EnableImportCallOptimization) {
2570 assert(MI->getOperand(0).getReg() == X86::RAX &&
2571 "Indirect calls with impcall enabled must go through RAX (as "
2572 "enforced by CALL64r_ImpCall)");
2573
2574 emitLabelAndRecordForImportCallOptimization(
2575 IMAGE_RETPOLINE_AMD64_INDIR_CALL);
2576 MCInst TmpInst;
2577 MCInstLowering.Lower(MI, TmpInst);
2578 emitCallInstruction(TmpInst);
2579
2580 // For Import Call Optimization to work, we need a 3-byte nop after the
2581 // call instruction.
2582 emitNop(*OutStreamer, 3, Subtarget);
2583 maybeEmitNopAfterCallForWindowsEH(MI);
2584 return;
2585 }
2586 break;
2587
2588 case X86::CALL64m:
2589 if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
2590 emitLabelAndRecordForImportCallOptimization(
2591 IMAGE_RETPOLINE_AMD64_CFG_CALL);
2592 }
2593 break;
2594
2595 case X86::JCC_1:
2596 // Two instruction prefixes (2EH for branch not-taken and 3EH for branch
2597 // taken) are used as branch hints. Here we add branch taken prefix for
2598 // jump instruction with higher probability than threshold.
2599 if (getSubtarget().hasBranchHint() && EnableBranchHint) {
2600 const MachineBranchProbabilityInfo *MBPI =
2601 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
2602 MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
2603 BranchProbability EdgeProb =
2604 MBPI->getEdgeProbability(MI->getParent(), DestBB);
2606 if (EdgeProb > Threshold)
2607 EmitAndCountInstruction(MCInstBuilder(X86::DS_PREFIX));
2608 }
2609 break;
2610 }
2611
2612 MCInst TmpInst;
2613 MCInstLowering.Lower(MI, TmpInst);
2614
2615 if (MI->isCall()) {
2616 emitCallInstruction(TmpInst);
2617 // Since tail calls transfer control without leaving a stack frame, there is
2618 // never a need for NOP padding tail calls.
2619 if (!IsTailJump)
2620 maybeEmitNopAfterCallForWindowsEH(MI);
2621 return;
2622 }
2623
2624 EmitAndCountInstruction(TmpInst);
2625}
2626
2627void X86AsmPrinter::emitCallInstruction(const llvm::MCInst &MCI) {
2628 // Stackmap shadows cannot include branch targets, so we can count the bytes
2629 // in a call towards the shadow, but must ensure that the no thread returns
2630 // in to the stackmap shadow. The only way to achieve this is if the call
2631 // is at the end of the shadow.
2632
2633 // Count then size of the call towards the shadow
2634 SMShadowTracker.count(MCI, getSubtargetInfo(), CodeEmitter.get());
2635 // Then flush the shadow so that we fill with nops before the call, not
2636 // after it.
2637 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2638 // Then emit the call
2639 OutStreamer->emitInstruction(MCI, getSubtargetInfo());
2640}
2641
2642// Determines whether a NOP is required after a CALL, so that Windows EH
2643// IP2State tables have the correct information.
2644//
2645// On most Windows platforms (AMD64, ARM64, ARM32, IA64, but *not* x86-32),
2646// exception handling works by looking up instruction pointers in lookup
2647// tables. These lookup tables are stored in .xdata sections in executables.
2648// One element of the lookup tables are the "IP2State" tables (Instruction
2649// Pointer to State).
2650//
2651// If a function has any instructions that require cleanup during exception
2652// unwinding, then it will have an IP2State table. Each entry in the IP2State
2653// table describes a range of bytes in the function's instruction stream, and
2654// associates an "EH state number" with that range of instructions. A value of
2655// -1 means "the null state", which does not require any code to execute.
2656// A value other than -1 is an index into the State table.
2657//
2658// The entries in the IP2State table contain byte offsets within the instruction
2659// stream of the function. The Windows ABI requires that these offsets are
2660// aligned to instruction boundaries; they are not permitted to point to a byte
2661// that is not the first byte of an instruction.
2662//
2663// Unfortunately, CALL instructions present a problem during unwinding. CALL
2664// instructions push the address of the instruction after the CALL instruction,
2665// so that execution can resume after the CALL. If the CALL is the last
2666// instruction within an IP2State region, then the return address (on the stack)
2667// points to the *next* IP2State region. This means that the unwinder will
2668// use the wrong cleanup funclet during unwinding.
2669//
2670// To fix this problem, the Windows AMD64 ABI requires that CALL instructions
2671// are never placed at the end of an IP2State region. Stated equivalently, the
2672// end of a CALL instruction cannot be aligned to an IP2State boundary. If a
2673// CALL instruction would occur at the end of an IP2State region, then the
2674// compiler must insert a NOP instruction after the CALL. The NOP instruction
2675// is placed in the same EH region as the CALL instruction, so that the return
2676// address points to the NOP and the unwinder will locate the correct region.
2677//
2678// NOP padding is only necessary on Windows AMD64 targets. On ARM64 and ARM32,
2679// instructions have a fixed size so the unwinder knows how to "back up" by
2680// one instruction.
2681//
2682// Interaction with Import Call Optimization (ICO):
2683//
2684// Import Call Optimization (ICO) is a compiler + OS feature on Windows which
2685// improves the performance and security of DLL imports. ICO relies on using a
2686// specific CALL idiom that can be replaced by the OS DLL loader. This removes
2687// a load and indirect CALL and replaces it with a single direct CALL.
2688//
2689// To achieve this, ICO also inserts NOPs after the CALL instruction. If the
2690// end of the CALL is aligned with an EH state transition, we *also* insert
2691// a single-byte NOP. **Both forms of NOPs must be preserved.** They cannot
2692// be combined into a single larger NOP; nor can the second NOP be removed.
2693//
2694// This is necessary because, if ICO is active and the call site is modified
2695// by the loader, the loader will end up overwriting the NOPs that were inserted
2696// for ICO. That means that those NOPs cannot be used for the correct
2697// termination of the exception handling region (the IP2State transition),
2698// so we still need an additional NOP instruction. The NOPs cannot be combined
2699// into a longer NOP (which is ordinarily desirable) because then ICO would
2700// split one instruction, producing a malformed instruction after the ICO call.
2701void X86AsmPrinter::maybeEmitNopAfterCallForWindowsEH(const MachineInstr *MI) {
2702 // We only need to insert NOPs after CALLs when targeting Windows on AMD64.
2703 // (Don't let the name fool you: Itanium refers to table-based exception
2704 // handling, not the Itanium architecture.)
2707 return;
2708 }
2709
2710 bool HasEHPersonality = MF->getWinEHFuncInfo() != nullptr;
2711
2712 // Set up MBB iterator, initially positioned on the same MBB as MI.
2713 MachineFunction::const_iterator MFI(MI->getParent());
2715
2716 // Set up instruction iterator, positioned immediately *after* MI.
2718 MachineBasicBlock::const_iterator MBBE = MI->getParent()->end();
2719 ++MBBI; // Step over MI
2720
2721 // This loop iterates MBBs
2722 for (;;) {
2723 // This loop iterates instructions
2724 for (; MBBI != MBBE; ++MBBI) {
2725 // Check the instruction that follows this CALL.
2726 const MachineInstr &NextMI = *MBBI;
2727
2728 // If there is an EH_LABEL after this CALL, then there is an EH state
2729 // transition after this CALL. This is exactly the situation which
2730 // requires NOP padding.
2731 if (NextMI.isEHLabel()) {
2732 if (HasEHPersonality) {
2733 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2734 return;
2735 }
2736 // We actually want to continue, in case there is an SEH_BeginEpilogue
2737 // instruction after the EH_LABEL. In some situations, IR is produced
2738 // that contains EH_LABEL pseudo-instructions, even when we are not
2739 // generating IP2State tables. We still need to insert a NOP before
2740 // SEH_BeginEpilogue in that case.
2741 continue;
2742 }
2743
2744 // Somewhat similarly, if the CALL is the last instruction before the
2745 // SEH prologue, then we also need a NOP. This is necessary because the
2746 // Windows stack unwinder will not invoke a function's exception handler
2747 // if the instruction pointer is in the function prologue or epilogue.
2748 //
2749 // We always emit a NOP before SEH_BeginEpilogue, even if there is no
2750 // personality function (unwind info) for this frame. This is the same
2751 // behavior as MSVC.
2752 if (NextMI.getOpcode() == X86::SEH_BeginEpilogue) {
2753 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2754 return;
2755 }
2756
2757 if (!NextMI.isPseudo() && !NextMI.isMetaInstruction()) {
2758 // We found a real instruction. During the CALL, the return IP will
2759 // point to this instruction. Since this instruction has the same EH
2760 // state as the call itself (because there is no intervening EH_LABEL),
2761 // the IP2State table will be accurate; there is no need to insert a
2762 // NOP.
2763 return;
2764 }
2765
2766 // The next instruction is a pseudo-op. Ignore it and keep searching.
2767 // Because these instructions do not generate any machine code, they
2768 // cannot prevent the IP2State table from pointing at the wrong
2769 // instruction during a CALL.
2770 }
2771
2772 // We've reached the end of this MBB. Find the next MBB in program order.
2773 // MBB order should be finalized by this point, so falling across MBBs is
2774 // expected.
2775 ++MFI;
2776 if (MFI == MFE) {
2777 // No more blocks; we've reached the end of the function. This should
2778 // only happen with no-return functions, but double-check to be sure.
2779 if (HasEHPersonality) {
2780 // If the CALL has no successors, then it is a noreturn function.
2781 // Insert an INT3 instead of a NOP. This accomplishes the same purpose,
2782 // but is more clear to read. Also, analysis tools will understand
2783 // that they should not continue disassembling after the CALL (unless
2784 // there are other branches to that label).
2785 if (MI->getParent()->succ_empty())
2786 EmitAndCountInstruction(MCInstBuilder(X86::INT3));
2787 else
2788 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2789 }
2790 return;
2791 }
2792
2793 // Set up iterator to scan the next basic block.
2794 const MachineBasicBlock *NextMBB = &*MFI;
2795 MBBI = NextMBB->instr_begin();
2796 MBBE = NextMBB->instr_end();
2797 }
2798}
2799
2800void X86AsmPrinter::emitLabelAndRecordForImportCallOptimization(
2801 ImportCallKind Kind) {
2802 assert(EnableImportCallOptimization);
2803
2804 MCSymbol *CallSiteSymbol = MMI->getContext().createNamedTempSymbol("impcall");
2805 OutStreamer->emitLabel(CallSiteSymbol);
2806
2807 SectionToImportedFunctionCalls[OutStreamer->getCurrentSectionOnly()]
2808 .push_back({CallSiteSymbol, Kind});
2809}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
std::string Name
Symbol * Sym
Definition: ELF_riscv.cpp:479
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
uint64_t IntrinsicInst * II
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static MCSymbol * GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
static MCOperand LowerSymbolOperand(const MachineInstr *MI, const MachineOperand &MO, const MCSymbol *Symbol, AsmPrinter &AP)
static void printShuffleMask(raw_ostream &CS, StringRef Src1Name, StringRef Src2Name, ArrayRef< int > Mask)
static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, const X86Subtarget *Subtarget)
Emit the optimal amount of multi-byte nops on X86.
static unsigned getRetOpcode(const X86Subtarget &Subtarget)
static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, int SrcEltBits, int DstEltBits)
static unsigned convertTailJumpOpcode(unsigned Opcode)
static unsigned getSrcIdx(const MachineInstr *MI, unsigned SrcIdx)
static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, int Repeats, int BitWidth)
static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, int SrcEltBits, int DstEltBits, bool IsSext)
static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, int SclWidth, int VecWidth, const char *ShuffleComment)
#define MASK_AVX512_CASE(Instr)
#define CASE_ARITH_RM(Instr)
static void addConstantComments(const MachineInstr *MI, MCStreamer &OutStreamer)
#define CASE_256_MOV_RM()
bool hasJumpTableInfoInBlock(const llvm::MachineInstr *MI)
static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, const X86Subtarget *Subtarget)
Emit the largest nop instruction smaller than or equal to NumBytes bytes.
static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, unsigned SrcOpIdx)
#define CASE_MOVX_RM(Ext, Type)
bool isImportedFunction(const MachineOperand &MO)
static cl::opt< bool > EnableBranchHint("enable-branch-hint", cl::desc("Enable branch hint."), cl::init(false), cl::Hidden)
static void printConstant(const APInt &Val, raw_ostream &CS, bool PrintZero=false)
static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, int SrcEltBits, int DstEltBits)
static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, unsigned SrcOp2Idx, ArrayRef< int > Mask)
bool isCallToCFGuardFunction(const MachineInstr *MI)
#define CASE_512_MOV_RM()
static cl::opt< unsigned > BranchHintProbabilityThreshold("branch-hint-probability-threshold", cl::desc("The probability threshold of enabling branch hint."), cl::init(50), cl::Hidden)
#define CASE_128_MOV_RM()
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition: APFloat.h:1478
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition: APFloat.h:1079
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
unsigned getNumWords() const
Get the number of words.
Definition: APInt.h:1495
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:985
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition: APInt.h:569
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:90
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:706
MCSymbol * CurrentFnBegin
Definition: AsmPrinter.h:232
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:433
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:93
virtual MCSymbol * GetCPISymbol(unsigned CPID) const
Return the symbol for the specified constant pool entry.
void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol)
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:96
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:108
MCSymbol * GetJTISymbol(unsigned JTID, bool isLinkerPrivate=false) const
Return the symbol for the specified jump table entry.
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind, uint8_t Version=0)
MCSymbol * getSymbolPreferLocal(const GlobalValue &GV) const
Similar to getSymbol() but preferred for references.
Definition: AsmPrinter.cpp:710
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:111
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:100
MCSymbol * createTempSymbol(const Twine &Name) const
bool isPositionIndependent() const
Definition: AsmPrinter.cpp:404
MCSymbol * CurrentPatchableFunctionEntrySym
The symbol for the entry in __patchable_function_entires.
Definition: AsmPrinter.h:123
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:105
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:701
StackMaps SM
Definition: AsmPrinter.h:248
MCSymbol * GetBlockAddressSymbol(const BlockAddress *BA) const
Return the MCSymbol used to satisfy BlockAddress uses of the specified basic block.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
Definition: AsmPrinter.cpp:428
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Register getReg() const
void recordFaultingOp(FaultKind FaultTy, const MCSymbol *FaultingLabel, const MCSymbol *HandlerLabel)
Definition: FaultMaps.cpp:28
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
bool hasInternalLinkage() const
Definition: GlobalValue.h:528
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:64
WinEH::EncodingType getWinEHEncodingType() const
Definition: MCAsmInfo.h:634
ExceptionHandling getExceptionHandlingType() const
Definition: MCAsmInfo.h:633
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:343
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:428
MCCodeEmitter - Generic instruction encoding interface.
Definition: MCCodeEmitter.h:23
virtual void encodeInstruction(const MCInst &Inst, SmallVectorImpl< char > &CB, SmallVectorImpl< MCFixup > &Fixups, const MCSubtargetInfo &STI) const =0
Encode the given Inst to bytes and append to CB.
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:212
Context object for machine code objects.
Definition: MCContext.h:83
LLVM_ABI MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:386
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:203
const MCTargetOptions * getTargetOptions() const
Definition: MCContext.h:420
LLVM_ABI MCSymbol * createNamedTempSymbol()
Create a temporary symbol with a unique name whose name cannot be omitted in the symbol table.
Definition: MCContext.cpp:388
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
MCInstBuilder & addReg(MCRegister Reg)
Add a new register operand.
Definition: MCInstBuilder.h:37
MCInstBuilder & addExpr(const MCExpr *Val)
Add a new MCExpr operand.
Definition: MCInstBuilder.h:61
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
unsigned getNumOperands() const
Definition: MCInst.h:212
unsigned getOpcode() const
Definition: MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:232
void setFlags(unsigned F)
Definition: MCInst.h:204
void addOperand(const MCOperand Op)
Definition: MCInst.h:215
iterator begin()
Definition: MCInst.h:227
void setOpcode(unsigned Op)
Definition: MCInst.h:201
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:210
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:145
MCRegister getReg() const
Returns the register number.
Definition: MCInst.h:73
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Streaming machine code generation interface.
Definition: MCStreamer.h:220
virtual void AddComment(const Twine &T, bool EOL=true)
Add a textual comment.
Definition: MCStreamer.h:387
virtual void emitRawComment(const Twine &T, bool TabPrefix=true)
Print T and prefix it with the comment string (normally #) and optionally a tab.
Definition: MCStreamer.cpp:119
void setAllowAutoPadding(bool v)
Definition: MCStreamer.h:328
bool getAllowAutoPadding() const
Definition: MCStreamer.h:329
Generic base class for all target subtargets.
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:190
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:188
instr_iterator instr_begin()
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
reverse_instr_iterator instr_rbegin()
reverse_instr_iterator instr_rend()
instr_iterator instr_end()
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
BasicBlockListType::const_iterator const_iterator
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
mop_range operands()
Definition: MachineInstr.h:693
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MachineInstr.h:928
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
bool isEHLabel() const
bool isMetaInstruction(QueryType Type=IgnoreBundle) const
Return true if this instruction doesn't produce any output in the form of executable instructions.
Definition: MachineInstr.h:934
MachineModuleInfoCOFF - This is a MachineModuleInfoImpl implementation for COFF targets.
StubValueTy & getGVStubEntry(MCSymbol *Sym)
PointerIntPair< MCSymbol *, 1, bool > StubValueTy
MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation for MachO targets.
const MCContext & getContext() const
const Module * getModule() const
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateMCSymbol(MCSymbol *Sym, unsigned TargetFlags=0)
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
const BlockAddress * getBlockAddress() const
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
MCSymbol * getMCSymbol() const
@ MO_Immediate
Immediate operand.
@ MO_ConstantPoolIndex
Address of indexed Constant in Constant Pool.
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
@ MO_GlobalAddress
Address of a global value.
@ MO_RegisterMask
Mask of preserved registers.
@ MO_BlockAddress
Address of a basic block.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_JumpTableIndex
Address of indexed Jump Table for switch.
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
LLVM_ABI void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:121
bool getRtLibUseGOT() const
Returns true if PLT should be avoided for RTLib calls.
Definition: Module.cpp:710
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:99
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:140
MI-level patchpoint operands.
Definition: StackMaps.h:77
PointerIntPair - This class implements a pair of a pointer and small integer.
PointerTy getPointer() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:102
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:79
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
LLVM_ABI void recordStatepoint(const MCSymbol &L, const MachineInstr &MI)
Generate a stackmap record for a statepoint instruction.
Definition: StackMaps.cpp:560
LLVM_ABI void recordPatchPoint(const MCSymbol &L, const MachineInstr &MI)
Generate a stackmap record for a patchpoint instruction.
Definition: StackMaps.cpp:539
LLVM_ABI void recordStackMap(const MCSymbol &L, const MachineInstr &MI)
Generate a stackmap record for a stackmap instruction.
Definition: StackMaps.cpp:529
MI-level Statepoint operands.
Definition: StackMaps.h:159
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:480
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:151
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
const Triple & getTargetTriple() const
TargetOptions Options
const MCRegisterInfo * getMCRegisterInfo() const
MCTargetOptions MCOptions
Machine level options.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:766
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
static const char * getRegisterName(MCRegister Reg)
void emitInstruction(const MachineInstr *MI) override
Targets should implement this to emit instructions.
const X86Subtarget & getSubtarget() const
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
unsigned getSlotSize() const
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:301
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:176
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:132
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:214
X86 target streamer implementing x86-only assembly directives.
virtual bool emitFPOPushReg(MCRegister Reg, SMLoc L={})
virtual bool emitFPOEndPrologue(SMLoc L={})
virtual bool emitFPOStackAlign(unsigned Align, SMLoc L={})
virtual bool emitFPOSetFrame(MCRegister Reg, SMLoc L={})
virtual bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L={})
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:662
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:680
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Itanium
Windows CE ARM, PowerPC, SH3, SH4.
Reg
All possible values of the reg field in the ModR/M byte.
bool isKMergeMasked(uint64_t TSFlags)
Definition: X86BaseInfo.h:1319
@ MO_TLSLD
MO_TLSLD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
Definition: X86BaseInfo.h:411
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
Definition: X86BaseInfo.h:391
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:381
@ MO_DARWIN_NONLAZY_PIC_BASE
MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates that the reference is actually...
Definition: X86BaseInfo.h:468
@ MO_GOT_ABSOLUTE_ADDRESS
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [.
Definition: X86BaseInfo.h:367
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: X86BaseInfo.h:488
@ MO_NTPOFF
MO_NTPOFF - On a symbol operand this indicates that the immediate is the negative thread-pointer offs...
Definition: X86BaseInfo.h:450
@ MO_DARWIN_NONLAZY
MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: X86BaseInfo.h:464
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
Definition: X86BaseInfo.h:432
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
Definition: X86BaseInfo.h:456
@ MO_TPOFF
MO_TPOFF - On a symbol operand this indicates that the immediate is the thread-pointer offset for the...
Definition: X86BaseInfo.h:438
@ MO_TLVP_PIC_BASE
MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate is some TLS offset from the ...
Definition: X86BaseInfo.h:476
@ MO_GOT
MO_GOT - On a symbol operand this indicates that the immediate is the offset to the GOT entry for the...
Definition: X86BaseInfo.h:376
@ MO_ABS8
MO_ABS8 - On a symbol operand this indicates that the symbol is known to be an absolute symbol in ran...
Definition: X86BaseInfo.h:484
@ MO_PLT
MO_PLT - On a symbol operand this indicates that the immediate is offset to the PLT entry of symbol n...
Definition: X86BaseInfo.h:396
@ MO_TLSGD
MO_TLSGD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
Definition: X86BaseInfo.h:403
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
Definition: X86BaseInfo.h:363
@ MO_TLVP
MO_TLVP - On a symbol operand this indicates that the immediate is some TLS offset.
Definition: X86BaseInfo.h:472
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:460
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
Definition: X86BaseInfo.h:425
@ MO_SECREL
MO_SECREL - On a symbol operand this indicates that the immediate is the offset from beginning of sec...
Definition: X86BaseInfo.h:480
@ MO_DTPOFF
MO_DTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
Definition: X86BaseInfo.h:444
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:371
@ MO_TLSLDM
MO_TLSLDM - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
Definition: X86BaseInfo.h:419
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
bool isKMasked(uint64_t TSFlags)
Definition: X86BaseInfo.h:1314
bool isX86_64ExtendedReg(MCRegister Reg)
Definition: X86BaseInfo.h:1193
bool optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI)
bool optimizeMOV(MCInst &MI, bool In64BitMode)
Simplify things like MOV32rm to MOV32o32a.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
bool optimizeMOVSX(MCInst &MI)
@ S_GOTPCREL_NORELAX
Definition: X86MCAsmInfo.h:82
bool optimizeVPCMPWithImmediateOneOrSix(MCInst &MI)
bool optimizeShiftRotateWithImmediateOne(MCInst &MI)
bool optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc)
@ IP_HAS_AD_SIZE
Definition: X86BaseInfo.h:54
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:56
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
@ AC_EVEX_2_EVEX
Definition: X86InstrInfo.h:43
@ AC_EVEX_2_LEGACY
Definition: X86InstrInfo.h:39
bool optimizeINCDEC(MCInst &MI, bool In64BitMode)
@ AddrSegmentReg
Definition: X86BaseInfo.h:34
@ AddrNumOperands
Definition: X86BaseInfo.h:36
unsigned getVectorRegisterWidth(const MCOperandInfo &Info)
Get the width of the vector register operand.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, unsigned NumDstElts, bool IsAnyExtend, SmallVectorImpl< int > &ShuffleMask)
Decode a zero extension instruction as a shuffle mask.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
@ SM_SentinelUndef
@ SM_SentinelZero
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
bool isCFGuardFunction(const GlobalValue *GV)
Definition: CFGuard.cpp:325
@ WinEH
Windows Exception Handling.
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
void DecodeVPPERMMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPPERM mask from a raw array of constants such as from BUILD_VECTOR.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
const char * toString(DWARFSectionKind Kind)
void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, bool IsKasan, uint64_t *ShadowBase, int *MappingScale, bool *OrShadowOffset)
void DecodePSHUFBMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a PSHUFB mask from a raw array of constants such as from BUILD_VECTOR.
#define N
A RAII helper which defines a region of instructions which can't have padding added between them for ...
void changeAndComment(bool b)
NoAutoPaddingScope(MCStreamer &OS)
const bool OldAllowAutoPadding
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39