LLVM 22.0.0git
CodeGenData.h
Go to the documentation of this file.
1//===- CodeGenData.h --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for codegen data that has stable summary which
10// can be used to optimize the code in the subsequent codegen.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CGDATA_CODEGENDATA_H
15#define LLVM_CGDATA_CODEGENDATA_H
16
23#include "llvm/IR/Module.h"
29#include <mutex>
30
31namespace llvm {
32
34#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
36};
37
40 bool AddSegmentInfo = true);
41
42enum class CGDataKind {
43 Unknown = 0x0,
44 // A function outlining info.
46 // A function merging info.
49};
50
51LLVM_ABI const std::error_category &cgdata_category();
52
53enum class cgdata_error {
54 success = 0,
55 eof,
61};
62
63inline std::error_code make_error_code(cgdata_error E) {
64 return std::error_code(static_cast<int>(E), cgdata_category());
65}
66
67class LLVM_ABI CGDataError : public ErrorInfo<CGDataError> {
68public:
69 CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
70 : Err(Err), Msg(ErrStr.str()) {
71 assert(Err != cgdata_error::success && "Not an error");
72 }
73
74 std::string message() const override;
75
76 void log(raw_ostream &OS) const override { OS << message(); }
77
78 std::error_code convertToErrorCode() const override {
79 return make_error_code(Err);
80 }
81
82 cgdata_error get() const { return Err; }
83 const std::string &getMessage() const { return Msg; }
84
85 /// Consume an Error and return the raw enum value contained within it, and
86 /// the optional error message. The Error must either be a success value, or
87 /// contain a single CGDataError.
88 static std::pair<cgdata_error, std::string> take(Error E) {
89 auto Err = cgdata_error::success;
90 std::string Msg;
91 handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
92 assert(Err == cgdata_error::success && "Multiple errors encountered");
93 Err = IPE.get();
94 Msg = IPE.getMessage();
95 });
96 return {Err, Msg};
97 }
98
99 static char ID;
100
101private:
102 cgdata_error Err;
103 std::string Msg;
104};
105
110};
111
113 /// Global outlined hash tree that has oulined hash sequences across modules.
114 std::unique_ptr<OutlinedHashTree> PublishedHashTree;
115 /// Global stable function map that has stable function info across modules.
116 std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
117
118 /// This flag is set when -fcodegen-data-generate is passed.
119 /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
120 bool EmitCGData;
121
122 /// This is a singleton instance which is thread-safe. Unlike profile data
123 /// which is largely function-based, codegen data describes the whole module.
124 /// Therefore, this can be initialized once, and can be used across modules
125 /// instead of constructing the same one for each codegen backend.
126 static std::unique_ptr<CodeGenData> Instance;
127 static std::once_flag OnceFlag;
128
129 CodeGenData() = default;
130
131public:
132 ~CodeGenData() = default;
133
135
136 /// Returns true if we have a valid outlined hash tree.
138 return PublishedHashTree && !PublishedHashTree->empty();
139 }
141 return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
142 }
143
144 /// Returns the outlined hash tree. This can be globally used in a read-only
145 /// manner.
147 return PublishedHashTree.get();
148 }
150 return PublishedStableFunctionMap.get();
151 }
152
153 /// Returns true if we should write codegen data.
154 bool emitCGData() { return EmitCGData; }
155
156 /// Publish the (globally) merged or read outlined hash tree.
157 void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
158 PublishedHashTree = std::move(HashTree);
159 // Ensure we disable emitCGData as we do not want to read and write both.
160 EmitCGData = false;
161 }
162 void
163 publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
164 PublishedStableFunctionMap = std::move(FunctionMap);
165 // Ensure we disable emitCGData as we do not want to read and write both.
166 EmitCGData = false;
167 }
168};
169
170namespace cgdata {
171
172inline bool hasOutlinedHashTree() {
174}
175
176inline bool hasStableFunctionMap() {
178}
179
182}
183
186}
187
188inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
189
190inline void
191publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
193}
194
195inline void
196publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
197 CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
198}
199
201 /// Backing buffer for serialized data stream.
203 /// Callback function to add serialized data to the stream.
205 /// Backing buffer for cached data.
207 /// Cache mechanism for storing data.
209
210 StreamCacheData(unsigned Size, const FileCache &OrigCache,
211 const Twine &CachePrefix)
212 : Outputs(Size), Files(Size) {
213 AddStream = [&](size_t Task, const Twine &ModuleName) {
214 return std::make_unique<CachedFileStream>(
215 std::make_unique<raw_svector_ostream>(Outputs[Task]));
216 };
217
218 if (OrigCache.isValid()) {
219 auto CGCacheOrErr =
220 localCache("ThinLTO", CachePrefix, OrigCache.getCacheDirectoryPath(),
221 [&](size_t Task, const Twine &ModuleName,
222 std::unique_ptr<MemoryBuffer> MB) {
223 Files[Task] = std::move(MB);
224 });
225 if (Error Err = CGCacheOrErr.takeError())
226 report_fatal_error(std::move(Err));
227 Cache = std::move(*CGCacheOrErr);
228 }
229 }
230 StreamCacheData() = delete;
231
232 /// Retrieve results from either the cache or the stream.
233 std::unique_ptr<SmallVector<StringRef>> getResult() {
234 unsigned NumOutputs = Outputs.size();
235 auto Result = std::make_unique<SmallVector<StringRef>>(NumOutputs);
236 for (unsigned I = 0; I < NumOutputs; ++I)
237 if (Files[I])
238 (*Result)[I] = Files[I]->getBuffer();
239 else
240 (*Result)[I] = Outputs[I];
241 return Result;
242 }
243};
244
245/// Save \p TheModule before the first codegen round.
246/// \p Task represents the partition number in the parallel code generation
247/// process. \p AddStream is the callback used to add the serialized module to
248/// the stream.
249LLVM_ABI void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
250 AddStreamFn AddStream);
251
252/// Load the optimized bitcode module for the second codegen round.
253/// \p OrigModule is the original bitcode module.
254/// \p Task identifies the partition number in the parallel code generation
255/// process. \p Context provides the environment settings for module operations.
256/// \p IRFiles contains optimized bitcode module files needed for loading.
257/// \return A unique_ptr to the loaded Module, or nullptr if loading fails.
258LLVM_ABI std::unique_ptr<Module>
259loadModuleForTwoRounds(BitcodeModule &OrigModule, unsigned Task,
260 LLVMContext &Context, ArrayRef<StringRef> IRFiles);
261
262/// Merge the codegen data from the scratch objects \p ObjectFiles from the
263/// first codegen round.
264/// \return the combined hash of the merged codegen data.
267
268LLVM_ABI void warn(Error E, StringRef Whence = "");
269LLVM_ABI void warn(Twine Message, StringRef Whence = "", StringRef Hint = "");
270
271} // end namespace cgdata
272
273namespace IndexedCGData {
274
275// A signature for data validation, representing "\xffcgdata\x81" in
276// little-endian order
277const uint64_t Magic = 0x81617461646763ff;
278
280 // Version 1 is the first version. This version supports the outlined
281 // hash tree.
283 // Version 2 supports the stable function merging map.
285 // Version 3 adds the total size of the Names in the stable function map so
286 // we can skip reading them into the memory for non-assertion builds.
288 // Version 4 adjusts the structure of stable function merging map for
289 // efficient lazy loading support.
291 CurrentVersion = CG_DATA_INDEX_VERSION
294
295struct Header {
301
302 // New fields should only be added at the end to ensure that the size
303 // computation is correct. The methods below need to be updated to ensure that
304 // the new field is read correctly.
305
306 // Reads a header struct from the buffer.
307 LLVM_ABI static Expected<Header> readFromBuffer(const unsigned char *Curr);
308};
309
310} // end namespace IndexedCGData
311
312} // end namespace llvm
313
314#endif // LLVM_CODEGEN_PREPARE_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition: Compiler.h:213
uint64_t Size
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition: MD5.cpp:58
raw_pwrite_stream & OS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Represents a module in a bitcode file.
const std::string & getMessage() const
Definition: CodeGenData.h:83
std::error_code convertToErrorCode() const override
Convert this error to a std::error_code.
Definition: CodeGenData.h:78
static std::pair< cgdata_error, std::string > take(Error E)
Consume an Error and return the raw enum value contained within it, and the optional error message.
Definition: CodeGenData.h:88
static char ID
Definition: CodeGenData.h:99
void log(raw_ostream &OS) const override
Print an error message to an output stream.
Definition: CodeGenData.h:76
CGDataError(cgdata_error Err, const Twine &ErrStr=Twine())
Definition: CodeGenData.h:69
cgdata_error get() const
Definition: CodeGenData.h:82
bool hasStableFunctionMap()
Definition: CodeGenData.h:140
const StableFunctionMap * getStableFunctionMap()
Definition: CodeGenData.h:149
bool emitCGData()
Returns true if we should write codegen data.
Definition: CodeGenData.h:154
void publishOutlinedHashTree(std::unique_ptr< OutlinedHashTree > HashTree)
Publish the (globally) merged or read outlined hash tree.
Definition: CodeGenData.h:157
bool hasOutlinedHashTree()
Returns true if we have a valid outlined hash tree.
Definition: CodeGenData.h:137
const OutlinedHashTree * getOutlinedHashTree()
Returns the outlined hash tree.
Definition: CodeGenData.h:146
~CodeGenData()=default
void publishStableFunctionMap(std::unique_ptr< StableFunctionMap > FunctionMap)
Definition: CodeGenData.h:163
static LLVM_ABI CodeGenData & getInstance()
Base class for user error types.
Definition: Error.h:354
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
Tagged union holding either a T or a Error.
Definition: Error.h:485
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
ObjectFormatType
Definition: Triple.h:314
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
const uint64_t Version
Definition: CodeGenData.h:293
const uint64_t Magic
Definition: CodeGenData.h:277
LLVM_ABI Expected< stable_hash > mergeCodeGenData(ArrayRef< StringRef > ObjectFiles)
Merge the codegen data from the scratch objects ObjectFiles from the first codegen round.
void publishOutlinedHashTree(std::unique_ptr< OutlinedHashTree > HashTree)
Definition: CodeGenData.h:191
bool hasOutlinedHashTree()
Definition: CodeGenData.h:172
bool hasStableFunctionMap()
Definition: CodeGenData.h:176
LLVM_ABI void warn(Error E, StringRef Whence="")
const OutlinedHashTree * getOutlinedHashTree()
Definition: CodeGenData.h:180
void publishStableFunctionMap(std::unique_ptr< StableFunctionMap > FunctionMap)
Definition: CodeGenData.h:196
LLVM_ABI void saveModuleForTwoRounds(const Module &TheModule, unsigned Task, AddStreamFn AddStream)
Save TheModule before the first codegen round.
bool emitCGData()
Definition: CodeGenData.h:188
const StableFunctionMap * getStableFunctionMap()
Definition: CodeGenData.h:184
LLVM_ABI std::unique_ptr< Module > loadModuleForTwoRounds(BitcodeModule &OrigModule, unsigned Task, LLVMContext &Context, ArrayRef< StringRef > IRFiles)
Load the optimized bitcode module for the second codegen round.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
CGDataKind
Definition: CodeGenData.h:42
CGDataMode
Definition: CodeGenData.h:106
@ Read
Definition: CodeGenData.h:108
@ Write
Definition: CodeGenData.h:109
std::error_code make_error_code(BitcodeError E)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:990
std::function< Expected< std::unique_ptr< CachedFileStream > >(unsigned Task, const Twine &ModuleName)> AddStreamFn
This type defines the callback to add a file that is generated on the fly.
Definition: Caching.h:60
cgdata_error
Definition: CodeGenData.h:53
@ None
Definition: CodeGenData.h:107
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
LLVM_ABI const std::error_category & cgdata_category()
Definition: CodeGenData.cpp:91
CGDataSectKind
Definition: CodeGenData.h:33
LLVM_ABI Expected< FileCache > localCache(const Twine &CacheNameRef, const Twine &TempFilePrefixRef, const Twine &CacheDirectoryPathRef, AddBufferFn AddBuffer=[](size_t Task, const Twine &ModuleName, std::unique_ptr< MemoryBuffer > MB) {})
Create a local file system cache which uses the given cache name, temporary file prefix,...
Definition: Caching.cpp:29
LLVM_ABI std::string getCodeGenDataSectionName(CGDataSectKind CGSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
This type represents a file cache system that manages caching of files.
Definition: Caching.h:85
const std::string & getCacheDirectoryPath() const
Definition: Caching.h:95
bool isValid() const
Definition: Caching.h:98
static LLVM_ABI Expected< Header > readFromBuffer(const unsigned char *Curr)
SmallVector< SmallString< 0 > > Outputs
Backing buffer for serialized data stream.
Definition: CodeGenData.h:202
FileCache Cache
Cache mechanism for storing data.
Definition: CodeGenData.h:208
SmallVector< std::unique_ptr< MemoryBuffer > > Files
Backing buffer for cached data.
Definition: CodeGenData.h:206
std::unique_ptr< SmallVector< StringRef > > getResult()
Retrieve results from either the cache or the stream.
Definition: CodeGenData.h:233
AddStreamFn AddStream
Callback function to add serialized data to the stream.
Definition: CodeGenData.h:204
StreamCacheData(unsigned Size, const FileCache &OrigCache, const Twine &CachePrefix)
Definition: CodeGenData.h:210