LLVM 22.0.0git
MCDisassembler.h
Go to the documentation of this file.
1//===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10#define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11
12#include "llvm/ADT/StringRef.h"
16#include "llvm/Support/Error.h"
17#include <cstdint>
18#include <memory>
19#include <vector>
20
21namespace llvm {
22
24 std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
25 std::optional<uint32_t> Index;
26 bool IsLabel = false;
28};
29
33 // XCOFF uses XCOFFSymInfo. Other targets use Type.
36 // Used by ELF to describe a mapping symbol that is usually not displayed.
38
39private:
40 bool IsXCOFF;
41 bool HasType;
42
43public:
44 SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr,
45 StringRef Name, std::optional<uint32_t> Idx, bool Label)
46 : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
47 IsMappingSymbol(false), IsXCOFF(true), HasType(false) {}
49 bool IsMappingSymbol = false, bool IsXCOFF = false)
51 IsXCOFF(IsXCOFF), HasType(true) {}
52 bool isXCOFF() const { return IsXCOFF; }
53
54private:
55 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
56 assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
57 "The value of IsXCOFF and HasType in P1 and P2 should be the same "
58 "respectively.");
59
60 if (P1.IsXCOFF && P1.HasType)
61 return std::tie(P1.Addr, P1.Type, P1.Name) <
62 std::tie(P2.Addr, P2.Type, P2.Name);
63
64 if (P1.IsXCOFF)
65 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
66 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
67
68 // With the same address, place mapping symbols first.
69 bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol;
70 return std::tie(P1.Addr, MS1, P1.Name, P1.Type) <
71 std::tie(P2.Addr, MS2, P2.Name, P2.Type);
72 }
73};
74
75using SectionSymbolsTy = std::vector<SymbolInfoTy>;
76
77template <typename T> class ArrayRef;
78class MCContext;
79class MCInst;
80class MCSubtargetInfo;
81class raw_ostream;
82
83/// Superclass for all disassemblers. Consumes a memory region and provides an
84/// array of assembly instructions.
86public:
87 /// Ternary decode status. Most backends will just use Fail and
88 /// Success, however some have a concept of an instruction with
89 /// understandable semantics but which is architecturally
90 /// incorrect. An example of this is ARM UNPREDICTABLE instructions
91 /// which are disassemblable but cause undefined behaviour.
92 ///
93 /// Because it makes sense to disassemble these instructions, there
94 /// is a "soft fail" failure mode that indicates the MCInst& is
95 /// valid but architecturally incorrect.
96 ///
97 /// The enum numbers are deliberately chosen such that reduction
98 /// from Success->SoftFail ->Fail can be done with a simple
99 /// bitwise-AND:
100 ///
101 /// LEFT & TOP = | Success Unpredictable Fail
102 /// --------------+-----------------------------------
103 /// Success | Success Unpredictable Fail
104 /// Unpredictable | Unpredictable Unpredictable Fail
105 /// Fail | Fail Fail Fail
106 ///
107 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
108 /// Success, SoftFail, Fail respectively.
110 Fail = 0,
112 Success = 3
113 };
114
116 : Ctx(Ctx), STI(STI) {}
117
119
120 /// Returns the disassembly of a single instruction.
121 ///
122 /// \param Instr - An MCInst to populate with the contents of the
123 /// instruction.
124 /// \param Size - A value to populate with the size of the instruction, or
125 /// the number of bytes consumed while attempting to decode
126 /// an invalid instruction.
127 /// \param Address - The address, in the memory space of region, of the first
128 /// byte of the instruction.
129 /// \param Bytes - A reference to the actual bytes of the instruction.
130 /// \param CStream - The stream to print comments and annotations on.
131 /// \return - MCDisassembler::Success if the instruction is valid,
132 /// MCDisassembler::SoftFail if the instruction was
133 /// disassemblable but invalid,
134 /// MCDisassembler::Fail if the instruction was invalid.
136 ArrayRef<uint8_t> Bytes, uint64_t Address,
137 raw_ostream &CStream) const = 0;
138
139 /// Returns the disassembly of an instruction bundle for VLIW architectures
140 /// like Hexagon.
141 ///
142 /// \param Instr - An MCInst to populate with the contents of
143 /// the Bundle with sub-instructions encoded as Inst operands.
145 ArrayRef<uint8_t> Bytes,
146 uint64_t Address,
147 raw_ostream &CStream) const {
148 return Fail;
149 }
150
151 /// Used to perform separate target specific disassembly for a particular
152 /// symbol. May parse any prelude that precedes instructions after the
153 /// start of a symbol, or the entire symbol.
154 /// This is used for example by WebAssembly to decode preludes.
155 ///
156 /// Base implementation returns false. So all targets by default decline to
157 /// treat symbols separately.
158 ///
159 /// \param Symbol - The symbol.
160 /// \param Size - The number of bytes consumed.
161 /// \param Address - The address, in the memory space of region, of the first
162 /// byte of the symbol.
163 /// \param Bytes - A reference to the actual bytes at the symbol location.
164 /// \return - True if this symbol triggered some target specific
165 /// disassembly for this symbol. Size must be set with the
166 /// number of bytes consumed.
167 /// - Error if this symbol triggered some target specific
168 /// disassembly for this symbol, but an error was found with
169 /// it. Size must be set with the number of bytes consumed.
170 /// - False if the target doesn't want to handle the symbol
171 /// separately. The value of Size is ignored in this case,
172 /// and Err must not be set.
173 virtual Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
174 ArrayRef<uint8_t> Bytes,
175 uint64_t Address) const;
176 // TODO:
177 // Implement similar hooks that can be used at other points during
178 // disassembly. Something along the following lines:
179 // - onBeforeInstructionDecode()
180 // - onAfterInstructionDecode()
181 // - onSymbolEnd()
182 // It should help move much of the target specific code from llvm-objdump to
183 // respective target disassemblers.
184
185 /// Suggest a distance to skip in a buffer of data to find the next
186 /// place to look for the start of an instruction. For example, if
187 /// all instructions have a fixed alignment, this might advance to
188 /// the next multiple of that alignment.
189 ///
190 /// If not overridden, the default is 1.
191 ///
192 /// \param Address - The address, in the memory space of region, of the
193 /// starting point (typically the first byte of something
194 /// that did not decode as a valid instruction at all).
195 /// \param Bytes - A reference to the actual bytes at Address. May be
196 /// needed in order to determine the width of an
197 /// unrecognized instruction (e.g. in Thumb this is a simple
198 /// consistent criterion that doesn't require knowing the
199 /// specific instruction). The caller can pass as much data
200 /// as they have available, and the function is required to
201 /// make a reasonable default choice if not enough data is
202 /// available to make a better one.
203 /// \return - A number of bytes to skip. Must always be greater than
204 /// zero. May be greater than the size of Bytes.
205 virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
206 uint64_t Address) const;
207
208private:
209 MCContext &Ctx;
210
211protected:
212 // Subtarget information, for instruction decoding predicates if required.
214 std::unique_ptr<MCSymbolizer> Symbolizer;
215
216public:
217 // Helpers around MCSymbolizer
218 bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
219 bool IsBranch, uint64_t Offset, uint64_t OpSize,
220 uint64_t InstSize) const;
221
222 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
223
224 /// Set \p Symzer as the current symbolizer.
225 /// This takes ownership of \p Symzer, and deletes the previously set one.
226 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
227
228 MCContext& getContext() const { return Ctx; }
229
230 const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
231
232 /// ELF-specific, set the ABI version from the object header.
233 virtual void setABIVersion(unsigned Version) {}
234
235 // Marked mutable because we cache it inside the disassembler, rather than
236 // having to pass it around as an argument through all the autogenerated code.
237 mutable raw_ostream *CommentStream = nullptr;
238};
239
240} // end namespace llvm
241
242#endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
#define SoftFail
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, const MCDisassembler *Decoder)
tryAddingPcLoadReferenceComment - trys to add a comment as to what is being referenced by a load inst...
static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, bool isBranch, uint64_t InstSize, MCInst &MI, const MCDisassembler *Decoder)
tryAddingSymbolicOperand - trys to add a symbolic operand in place of the immediate Value in the MCIn...
#define LLVM_ABI
Definition: Compiler.h:213
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
uint64_t Offset
Definition: ELF_riscv.cpp:478
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Tagged union holding either a T or a Error.
Definition: Error.h:485
Context object for machine code objects.
Definition: MCContext.h:83
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
virtual DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const
Returns the disassembly of an instruction bundle for VLIW architectures like Hexagon.
virtual void setABIVersion(unsigned Version)
ELF-specific, set the ABI version from the object header.
MCContext & getContext() const
const MCSubtargetInfo & getSubtargetInfo() const
std::unique_ptr< MCSymbolizer > Symbolizer
const MCSubtargetInfo & STI
DecodeStatus
Ternary decode status.
virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const =0
Returns the disassembly of a single instruction.
virtual ~MCDisassembler()
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
Generic base class for all target subtargets.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::vector< SymbolInfoTy > SectionSymbolsTy
SymInfo contains information about symbol: it's address and section index which is -1LL for absolute ...
SymbolInfoTy(std::optional< XCOFF::StorageMappingClass > Smc, uint64_t Addr, StringRef Name, std::optional< uint32_t > Idx, bool Label)
SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, bool IsMappingSymbol=false, bool IsXCOFF=false)
XCOFFSymbolInfoTy XCOFFSymInfo
friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2)
bool isXCOFF() const
LLVM_ABI bool operator<(const XCOFFSymbolInfoTy &SymInfo) const
The function is for symbol sorting when symbols have the same address.
std::optional< uint32_t > Index
std::optional< XCOFF::StorageMappingClass > StorageMappingClass