LLVM 22.0.0git
MCSymbolizer.h
Go to the documentation of this file.
1//===- llvm/MC/MCSymbolizer.h - MCSymbolizer class --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the MCSymbolizer class, which is used
10// to symbolize instructions decoded from an object, that is, transform their
11// immediate operands to MCExprs.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
16#define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
17
18#include "llvm/ADT/ArrayRef.h"
21#include <cstdint>
22#include <memory>
23#include <utility>
24
25namespace llvm {
26
27class MCContext;
28class MCInst;
29class raw_ostream;
30
31/// Symbolize and annotate disassembled instructions.
32///
33/// For now this mimics the old symbolization logic (from both ARM and x86), that
34/// relied on user-provided (C API) callbacks to do the actual symbol lookup in
35/// the object file. This was moved to MCExternalSymbolizer.
36/// A better API would not rely on actually calling the two methods here from
37/// inside each disassembler, but would use the instr info to determine what
38/// operands are actually symbolizable, and in what way. I don't think this
39/// information exists right now.
41protected:
43 std::unique_ptr<MCRelocationInfo> RelInfo;
44
45public:
46 /// Construct an MCSymbolizer, taking ownership of \p RelInfo.
47 MCSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo)
48 : Ctx(Ctx), RelInfo(std::move(RelInfo)) {
49 }
50
51 MCSymbolizer(const MCSymbolizer &) = delete;
53 virtual ~MCSymbolizer();
54
55 /// Try to add a symbolic operand instead of \p Value to the MCInst.
56 ///
57 /// Instead of having a difficult to read immediate, a symbolic operand would
58 /// represent this immediate in a more understandable way, for instance as a
59 /// symbol or an offset from a symbol. Relocations can also be used to enrich
60 /// the symbolic expression.
61 /// \param Inst - The MCInst where to insert the symbolic operand.
62 /// \param cStream - Stream to print comments and annotations on.
63 /// \param Value - Operand value, pc-adjusted by the caller if necessary.
64 /// \param Address - Load address of the instruction.
65 /// \param IsBranch - Is the instruction a branch?
66 /// \param Offset - Byte offset of the operand inside the inst.
67 /// \param OpSize - Size of the operand in bytes.
68 /// \param InstSize - Size of the instruction in bytes.
69 /// \return Whether a symbolic operand was added.
70 virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
71 int64_t Value, uint64_t Address,
72 bool IsBranch, uint64_t Offset,
73 uint64_t OpSize, uint64_t InstSize) = 0;
74
75 /// Try to add a comment on the PC-relative load.
76 /// For instance, in Mach-O, this is used to add annotations to instructions
77 /// that use C string literals, as found in __cstring.
79 int64_t Value,
80 uint64_t Address) = 0;
81
82 /// Get the MCSymbolizer's list of addresses that were referenced by
83 /// symbolizable operands but not resolved to a symbol. The caller (some
84 /// code that is disassembling a section or other chunk of code) would
85 /// typically create a synthetic label at each address and add them to its
86 /// list of symbols in the section, before creating a new MCSymbolizer with
87 /// the enhanced symbol list and retrying disassembling the section.
88 /// The returned array is unordered and may have duplicates.
89 /// The returned ArrayRef stops being valid on any call to or destruction of
90 /// the MCSymbolizer object.
91 virtual ArrayRef<uint64_t> getReferencedAddresses() const { return {}; }
92};
93
94} // end namespace llvm
95
96#endif // LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
#define LLVM_ABI
Definition: Compiler.h:213
uint64_t Offset
Definition: ELF_riscv.cpp:478
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Context object for machine code objects.
Definition: MCContext.h:83
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
Symbolize and annotate disassembled instructions.
Definition: MCSymbolizer.h:40
MCSymbolizer(MCContext &Ctx, std::unique_ptr< MCRelocationInfo > RelInfo)
Construct an MCSymbolizer, taking ownership of RelInfo.
Definition: MCSymbolizer.h:47
std::unique_ptr< MCRelocationInfo > RelInfo
Definition: MCSymbolizer.h:43
MCSymbolizer & operator=(const MCSymbolizer &)=delete
virtual ~MCSymbolizer()
virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address)=0
Try to add a comment on the PC-relative load.
MCSymbolizer(const MCSymbolizer &)=delete
virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize)=0
Try to add a symbolic operand instead of Value to the MCInst.
MCContext & Ctx
Definition: MCSymbolizer.h:42
virtual ArrayRef< uint64_t > getReferencedAddresses() const
Get the MCSymbolizer's list of addresses that were referenced by symbolizable operands but not resolv...
Definition: MCSymbolizer.h:91
LLVM Value Representation.
Definition: Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856