LLVM 22.0.0git
DataAccessProf.h
Go to the documentation of this file.
1//===- DataAccessProf.h - Data access profile format support ---------*- C++
2//-*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains support to construct and use data access profiles.
11//
12// For the original RFC of this pass please see
13// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
18#define LLVM_PROFILEDATA_DATAACCESSPROF_H_
19
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/StringRef.h"
28#include "llvm/Support/Error.h"
30
31#include <cstdint>
32#include <optional>
33#include <variant>
34
35namespace llvm {
36
37namespace memprof {
38
39/// The location of data in the source code. Used by profile lookup API.
42 : FileName(FileNameRef.str()), Line(Line) {}
43
44 // Empty constructor is used in yaml conversion.
46 /// The filename where the data is located.
47 std::string FileName;
48 /// The line number in the source code.
50};
51
52namespace internal {
53
54// Conceptually similar to SourceLocation except that FileNames are StringRef of
55// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
56// to represent data locations internally.
59 : FileName(FileNameRef), Line(Line) {}
60 // The filename where the data is located.
62 // The line number in the source code.
64};
65
66// The data access profiles for a symbol. Used by `DataAccessProfData`
67// to represent records internally.
70 bool IsStringLiteral)
73
74 // Represents a data symbol. The semantic comes in two forms: a symbol index
75 // for symbol name if `IsStringLiteral` is false, or the hash of a string
76 // content if `IsStringLiteral` is true. For most of the symbolizable static
77 // data, the mangled symbol names remain stable relative to the source code
78 // and therefore used to identify symbols across binary releases. String
79 // literals have unstable name patterns like `.str.N[.llvm.hash]`, so we use
80 // the content hash instead. This is a required field.
82
83 // The access count of symbol. Required.
85
86 // True iff this is a record for string literal (symbols with name pattern
87 // `.str.*` in the symbol table). Required.
89
90 // The locations of data in the source code. Optional.
92};
93} // namespace internal
94
95// SymbolID is either a string representing symbol name if the symbol has
96// stable mangled name relative to source code, or a uint64_t representing the
97// content hash of a string literal (with unstable name patterns like
98// `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
99using SymbolHandleRef = std::variant<StringRef, uint64_t>;
100
101// The senamtic is the same as `SymbolHandleRef` above. The strings are owned.
102using SymbolHandle = std::variant<std::string, uint64_t>;
103
104/// The data access profiles for a symbol.
106public:
110 if (std::holds_alternative<StringRef>(SymHandleRef)) {
111 SymHandle = std::get<StringRef>(SymHandleRef).str();
112 } else
113 SymHandle = std::get<uint64_t>(SymHandleRef);
114
115 for (auto Loc : LocRefs)
116 Locations.emplace_back(Loc.FileName, Loc.Line);
117 }
118 // Empty constructor is used in yaml conversion.
122 // The locations of data in the source code. Optional.
124};
125
126/// Encapsulates the data access profile data and the methods to operate on
127/// it. This class provides profile look-up, serialization and
128/// deserialization.
130public:
131 // Use MapVector to keep input order of strings for serialization and
132 // deserialization.
134
135 DataAccessProfData() : Saver(Allocator) {}
136
137 /// Serialize profile data to the output stream.
138 /// Storage layout:
139 /// - Serialized strings.
140 /// - The encoded hashes.
141 /// - Records.
143
144 /// Deserialize this class from the given buffer.
145 LLVM_ABI Error deserialize(const unsigned char *&Ptr);
146
147 /// Returns a profile record for \p SymbolID, or std::nullopt if there
148 /// isn't a record. Internally, this function will canonicalize the symbol
149 /// name before the lookup.
150 LLVM_ABI std::optional<DataAccessProfRecord>
151 getProfileRecord(const SymbolHandleRef SymID) const;
152
153 /// Returns true if \p SymID is seen in profiled binaries and cold.
154 LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const;
155
156 /// Methods to set symbolized data access profile. Returns error if
157 /// duplicated symbol names or content hashes are seen. The user of this
158 /// class should aggregate counters that correspond to the same symbol name
159 /// or with the same string literal hash before calling 'set*' methods.
161 uint64_t AccessCount);
162 /// Similar to the method above, for records with \p Locations representing
163 /// the `filename:line` where this symbol shows up. Note because of linker's
164 /// merge of identical symbols (e.g., unnamed_addr string literals), one
165 /// symbol is likely to have multiple locations.
167 uint64_t AccessCount,
168 ArrayRef<SourceLocation> Locations);
169 /// Add a symbol that's seen in the profiled binary without samples.
171
172 /// The following methods return array reference for various internal data
173 /// structures.
175 return StrToIndexMap.getArrayRef();
176 }
177 ArrayRef<
179 getRecords() const {
180 return Records.getArrayRef();
181 }
183 return KnownColdSymbols.getArrayRef();
184 }
186 return KnownColdHashes.getArrayRef();
187 }
188 [[nodiscard]] bool empty() const {
189 return Records.empty() && KnownColdSymbols.empty() &&
190 KnownColdHashes.empty();
191 }
192
193private:
194 /// Serialize the symbol strings into the output stream.
195 Error serializeSymbolsAndFilenames(ProfOStream &OS) const;
196
197 /// Deserialize the symbol strings from \p Ptr and increment \p Ptr to the
198 /// start of the next payload.
199 Error deserializeSymbolsAndFilenames(const unsigned char *&Ptr,
200 const uint64_t NumSampledSymbols,
201 const uint64_t NumColdKnownSymbols);
202
203 /// Decode the records and increment \p Ptr to the start of the next
204 /// payload.
205 Error deserializeRecords(const unsigned char *&Ptr);
206
207 /// A helper function to compute a storage index for \p SymbolID.
208 uint64_t getEncodedIndex(const SymbolHandleRef SymbolID) const;
209
210 // Keeps owned copies of the input strings.
211 // NOTE: Keep `Saver` initialized before other class members that reference
212 // its string copies and destructed after they are destructed.
213 llvm::BumpPtrAllocator Allocator;
215
216 // `Records` stores the records.
218
219 StringToIndexMap StrToIndexMap;
220 llvm::SetVector<uint64_t> KnownColdHashes;
221 llvm::SetVector<StringRef> KnownColdSymbols;
222};
223
224} // namespace memprof
225} // namespace llvm
226
227#endif // LLVM_PROFILEDATA_DATAACCESSPROF_H_
This file defines the BumpPtrAllocator interface.
#define LLVM_ABI
Definition: Compiler.h:213
This file defines DenseMapInfo traits for DenseMap<std::variant<Ts...>>.
This file implements a map that provides insertion order iteration.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:67
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
typename VectorType::value_type value_type
Definition: MapVector.h:39
ArrayRef< value_type > getArrayRef() const
Returns an array reference of the underlying vector.
Definition: MapVector.h:54
A vector that has set insertion semantics.
Definition: SetVector.h:59
ArrayRef< value_type > getArrayRef() const
Definition: SetVector.h:90
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Saves strings in the provided stable storage and returns a StringRef with a stable character pointer.
Definition: StringSaver.h:45
Encapsulates the data access profile data and the methods to operate on it.
LLVM_ABI Error addKnownSymbolWithoutSamples(SymbolHandleRef SymbolID)
Add a symbol that's seen in the profiled binary without samples.
ArrayRef< StringToIndexMap::value_type > getStrToIndexMapRef() const
The following methods return array reference for various internal data structures.
LLVM_ABI Error serialize(ProfOStream &OS) const
Serialize profile data to the output stream.
LLVM_ABI std::optional< DataAccessProfRecord > getProfileRecord(const SymbolHandleRef SymID) const
Returns a profile record for SymbolID, or std::nullopt if there isn't a record.
ArrayRef< MapVector< SymbolHandleRef, internal::DataAccessProfRecordRef >::value_type > getRecords() const
LLVM_ABI Error deserialize(const unsigned char *&Ptr)
Deserialize this class from the given buffer.
ArrayRef< StringRef > getKnownColdSymbols() const
LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const
Returns true if SymID is seen in profiled binaries and cold.
LLVM_ABI Error setDataAccessProfile(SymbolHandleRef SymbolID, uint64_t AccessCount)
Methods to set symbolized data access profile.
ArrayRef< uint64_t > getKnownColdHashes() const
llvm::MapVector< StringRef, uint64_t > StringToIndexMap
std::variant< StringRef, uint64_t > SymbolHandleRef
std::variant< std::string, uint64_t > SymbolHandle
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
The data access profiles for a symbol.
DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount, ArrayRef< internal::SourceLocationRef > LocRefs)
SmallVector< SourceLocation > Locations
The location of data in the source code. Used by profile lookup API.
uint32_t Line
The line number in the source code.
SourceLocation(StringRef FileNameRef, uint32_t Line)
std::string FileName
The filename where the data is located.
llvm::SmallVector< SourceLocationRef, 0 > Locations
DataAccessProfRecordRef(uint64_t SymbolID, uint64_t AccessCount, bool IsStringLiteral)
SourceLocationRef(StringRef FileNameRef, uint32_t Line)