LLVM 22.0.0git
IR2Vec.h
Go to the documentation of this file.
1//===- IR2Vec.h - Implementation of IR2Vec ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM
4// Exceptions. See the LICENSE file for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the IR2Vec vocabulary analysis(IR2VecVocabAnalysis),
11/// the core ir2vec::Embedder interface for generating IR embeddings,
12/// and related utilities like the IR2VecPrinterPass.
13///
14/// Program Embeddings are typically or derived-from a learned
15/// representation of the program. Such embeddings are used to represent the
16/// programs as input to machine learning algorithms. IR2Vec represents the
17/// LLVM IR as embeddings.
18///
19/// The IR2Vec algorithm is described in the following paper:
20///
21/// IR2Vec: LLVM IR Based Scalable Program Embeddings, S. VenkataKeerthy,
22/// Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar, Ramakrishna
23/// Upadrasta, and Y. N. Srikant, ACM Transactions on Architecture and
24/// Code Optimization (TACO), 2020. https://doi.org/10.1145/3418463.
25/// https://arxiv.org/abs/1909.06228
26///
27/// To obtain embeddings:
28/// First run IR2VecVocabAnalysis to populate the vocabulary.
29/// Then, use the Embedder interface to generate embeddings for the desired IR
30/// entities. See the documentation for more details -
31/// https://llvm.org/docs/MLGO.html#ir2vec-embeddings
32///
33//===----------------------------------------------------------------------===//
34
35#ifndef LLVM_ANALYSIS_IR2VEC_H
36#define LLVM_ANALYSIS_IR2VEC_H
37
38#include "llvm/ADT/DenseMap.h"
40#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
45#include "llvm/Support/JSON.h"
46#include <array>
47#include <map>
48#include <optional>
49
50namespace llvm {
51
52class Module;
53class BasicBlock;
54class Instruction;
55class Function;
56class Value;
57class raw_ostream;
58class LLVMContext;
60
61/// IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware.
62/// Symbolic embeddings capture the "syntactic" and "statistical correlation"
63/// of the IR entities. Flow-aware embeddings build on top of symbolic
64/// embeddings and additionally capture the flow information in the IR.
65/// IR2VecKind is used to specify the type of embeddings to generate.
66/// Note: Implementation of FlowAware embeddings is not same as the one
67/// described in the paper. The current implementation is a simplified version
68/// that captures the flow information (SSA-based use-defs) without tracing
69/// through memory level use-defs in the embedding computation described in the
70/// paper.
72
73namespace ir2vec {
74
80
81/// Embedding is a datatype that wraps std::vector<double>. It provides
82/// additional functionality for arithmetic and comparison operations.
83/// It is meant to be used *like* std::vector<double> but is more restrictive
84/// in the sense that it does not allow the user to change the size of the
85/// embedding vector. The dimension of the embedding is fixed at the time of
86/// construction of Embedding object. But the elements can be modified in-place.
87struct Embedding {
88private:
89 std::vector<double> Data;
90
91public:
92 Embedding() = default;
93 Embedding(const std::vector<double> &V) : Data(V) {}
94 Embedding(std::vector<double> &&V) : Data(std::move(V)) {}
95 Embedding(std::initializer_list<double> IL) : Data(IL) {}
96
97 explicit Embedding(size_t Size) : Data(Size, 0.0) {}
98 Embedding(size_t Size, double InitialValue) : Data(Size, InitialValue) {}
99
100 size_t size() const { return Data.size(); }
101 bool empty() const { return Data.empty(); }
102
103 double &operator[](size_t Itr) {
104 assert(Itr < Data.size() && "Index out of bounds");
105 return Data[Itr];
106 }
107
108 const double &operator[](size_t Itr) const {
109 assert(Itr < Data.size() && "Index out of bounds");
110 return Data[Itr];
111 }
112
113 using iterator = typename std::vector<double>::iterator;
114 using const_iterator = typename std::vector<double>::const_iterator;
115
116 iterator begin() { return Data.begin(); }
117 iterator end() { return Data.end(); }
118 const_iterator begin() const { return Data.begin(); }
119 const_iterator end() const { return Data.end(); }
120 const_iterator cbegin() const { return Data.cbegin(); }
121 const_iterator cend() const { return Data.cend(); }
122
123 const std::vector<double> &getData() const { return Data; }
124
125 /// Arithmetic operators
130 LLVM_ABI Embedding &operator*=(double Factor);
131 LLVM_ABI Embedding operator*(double Factor) const;
132
133 /// Adds Src Embedding scaled by Factor with the called Embedding.
134 /// Called_Embedding += Src * Factor
135 LLVM_ABI Embedding &scaleAndAdd(const Embedding &Src, float Factor);
136
137 /// Returns true if the embedding is approximately equal to the RHS embedding
138 /// within the specified tolerance.
140 double Tolerance = 1e-4) const;
141
142 LLVM_ABI void print(raw_ostream &OS) const;
143};
144
147
148/// Generic storage class for section-based vocabularies.
149/// VocabStorage provides a generic foundation for storing and accessing
150/// embeddings organized into sections.
152private:
153 /// Section-based storage
154 std::vector<std::vector<Embedding>> Sections;
155
156 const size_t TotalSize;
157 const unsigned Dimension;
158
159public:
160 /// Default constructor creates empty storage (invalid state)
161 VocabStorage() : Sections(), TotalSize(0), Dimension(0) {}
162
163 /// Create a VocabStorage with pre-organized section data
164 VocabStorage(std::vector<std::vector<Embedding>> &&SectionData);
165
168
169 VocabStorage(const VocabStorage &) = delete;
171
172 /// Get total number of entries across all sections
173 size_t size() const { return TotalSize; }
174
175 /// Get number of sections
176 unsigned getNumSections() const {
177 return static_cast<unsigned>(Sections.size());
178 }
179
180 /// Section-based access: Storage[sectionId][localIndex]
181 const std::vector<Embedding> &operator[](unsigned SectionId) const {
182 assert(SectionId < Sections.size() && "Invalid section ID");
183 return Sections[SectionId];
184 }
185
186 /// Get vocabulary dimension
187 unsigned getDimension() const { return Dimension; }
188
189 /// Check if vocabulary is valid (has data)
190 bool isValid() const { return TotalSize > 0; }
191
192 /// Iterator support for section-based access
194 const VocabStorage *Storage;
195 unsigned SectionId = 0;
196 size_t LocalIndex = 0;
197
198 public:
199 const_iterator(const VocabStorage *Storage, unsigned SectionId,
200 size_t LocalIndex)
201 : Storage(Storage), SectionId(SectionId), LocalIndex(LocalIndex) {}
202
203 LLVM_ABI const Embedding &operator*() const;
205 LLVM_ABI bool operator==(const const_iterator &Other) const;
206 LLVM_ABI bool operator!=(const const_iterator &Other) const;
207 };
208
209 const_iterator begin() const { return const_iterator(this, 0, 0); }
211 return const_iterator(this, getNumSections(), 0);
212 }
213
214 using VocabMap = std::map<std::string, Embedding>;
215 /// Parse a vocabulary section from JSON and populate the target vocabulary
216 /// map.
218 const json::Value &ParsedVocabValue,
219 VocabMap &TargetVocab, unsigned &Dim);
220};
221
222/// Class for storing and accessing the IR2Vec vocabulary.
223/// The Vocabulary class manages seed embeddings for LLVM IR entities. The
224/// seed embeddings are the initial learned representations of the entities
225/// of LLVM IR. The IR2Vec representation for a given IR is derived from these
226/// seed embeddings.
227///
228/// The vocabulary contains the seed embeddings for three types of entities:
229/// instruction opcodes, types, and operands. Types are grouped/canonicalized
230/// for better learning (e.g., all float variants map to FloatTy). The
231/// vocabulary abstracts away the canonicalization effectively, the exposed APIs
232/// handle all the known LLVM IR opcodes, types and operands.
233///
234/// This class helps populate the seed embeddings in an internal vector-based
235/// ADT. It provides logic to map every IR entity to a specific slot index or
236/// position in this vector, enabling O(1) embedding lookup while avoiding
237/// unnecessary computations involving string based lookups while generating the
238/// embeddings.
241
242 // Vocabulary Layout:
243 // +----------------+------------------------------------------------------+
244 // | Entity Type | Index Range |
245 // +----------------+------------------------------------------------------+
246 // | Opcodes | [0 .. (MaxOpcodes-1)] |
247 // | Canonical Types| [MaxOpcodes .. (MaxOpcodes+MaxCanonicalTypeIDs-1)] |
248 // | Operands | [(MaxOpcodes+MaxCanonicalTypeIDs) .. NumCanEntries] |
249 // +----------------+------------------------------------------------------+
250 // Note: MaxOpcodes is the number of unique opcodes supported by LLVM IR.
251 // MaxCanonicalTypeIDs is the number of canonicalized type IDs.
252 // "Similar" LLVM Types are grouped/canonicalized together. E.g., all
253 // float variants (FloatTy, DoubleTy, HalfTy, etc.) map to
254 // CanonicalTypeID::FloatTy. This helps reduce the vocabulary size
255 // and improves learning. Operands include Comparison predicates
256 // (ICmp/FCmp) along with other operand types. This can be extended to
257 // include other specializations in future.
258 enum class Section : unsigned {
259 Opcodes = 0,
260 CanonicalTypes = 1,
261 Operands = 2,
262 Predicates = 3,
263 MaxSections
264 };
265
266 // Use section-based storage for better organization and efficiency
267 VocabStorage Storage;
268
269 static constexpr unsigned NumICmpPredicates =
270 static_cast<unsigned>(CmpInst::LAST_ICMP_PREDICATE) -
271 static_cast<unsigned>(CmpInst::FIRST_ICMP_PREDICATE) + 1;
272 static constexpr unsigned NumFCmpPredicates =
273 static_cast<unsigned>(CmpInst::LAST_FCMP_PREDICATE) -
274 static_cast<unsigned>(CmpInst::FIRST_FCMP_PREDICATE) + 1;
275
276public:
277 /// Canonical type IDs supported by IR2Vec Vocabulary
293
294 /// Operand kinds supported by IR2Vec Vocabulary
302
303 /// Vocabulary layout constants
304#define LAST_OTHER_INST(NUM) static constexpr unsigned MaxOpcodes = NUM;
305#include "llvm/IR/Instruction.def"
306#undef LAST_OTHER_INST
307
308 static constexpr unsigned MaxTypeIDs = Type::TypeID::TargetExtTyID + 1;
309 static constexpr unsigned MaxCanonicalTypeIDs =
310 static_cast<unsigned>(CanonicalTypeID::MaxCanonicalType);
311 static constexpr unsigned MaxOperandKinds =
312 static_cast<unsigned>(OperandKind::MaxOperandKind);
313 // CmpInst::Predicate has gaps. We want the vocabulary to be dense without
314 // empty slots.
315 static constexpr unsigned MaxPredicateKinds =
316 NumICmpPredicates + NumFCmpPredicates;
317
318 Vocabulary() = default;
319 LLVM_ABI Vocabulary(VocabStorage &&Storage) : Storage(std::move(Storage)) {}
320
321 Vocabulary(const Vocabulary &) = delete;
322 Vocabulary &operator=(const Vocabulary &) = delete;
323
324 Vocabulary(Vocabulary &&) = default;
326
327 LLVM_ABI bool isValid() const {
328 return Storage.size() == NumCanonicalEntries;
329 }
330
331 LLVM_ABI unsigned getDimension() const {
332 assert(isValid() && "IR2Vec Vocabulary is invalid");
333 return Storage.getDimension();
334 }
335
336 /// Total number of entries (opcodes + canonicalized types + operand kinds +
337 /// predicates)
338 static constexpr size_t getCanonicalSize() { return NumCanonicalEntries; }
339
340 /// Function to get vocabulary key for a given Opcode
341 LLVM_ABI static StringRef getVocabKeyForOpcode(unsigned Opcode);
342
343 /// Function to get vocabulary key for a given TypeID
345 return getVocabKeyForCanonicalTypeID(getCanonicalTypeID(TypeID));
346 }
347
348 /// Function to get vocabulary key for a given OperandKind
350 unsigned Index = static_cast<unsigned>(Kind);
351 assert(Index < MaxOperandKinds && "Invalid OperandKind");
352 return OperandKindNames[Index];
353 }
354
355 /// Function to classify an operand into OperandKind
357
358 /// Function to get vocabulary key for a given predicate
360
361 /// Functions to return flat index
362 LLVM_ABI static unsigned getIndex(unsigned Opcode) {
363 assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
364 return Opcode - 1; // Convert to zero-based index
365 }
366
368 assert(static_cast<unsigned>(TypeID) < MaxTypeIDs && "Invalid type ID");
369 return MaxOpcodes + static_cast<unsigned>(getCanonicalTypeID(TypeID));
370 }
371
372 LLVM_ABI static unsigned getIndex(const Value &Op) {
373 unsigned Index = static_cast<unsigned>(getOperandKind(&Op));
374 assert(Index < MaxOperandKinds && "Invalid OperandKind");
375 return OperandBaseOffset + Index;
376 }
377
379 return PredicateBaseOffset + getPredicateLocalIndex(P);
380 }
381
382 /// Accessors to get the embedding for a given entity.
383 LLVM_ABI const ir2vec::Embedding &operator[](unsigned Opcode) const {
384 assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
385 return Storage[static_cast<unsigned>(Section::Opcodes)][Opcode - 1];
386 }
387
389 assert(static_cast<unsigned>(TypeID) < MaxTypeIDs && "Invalid type ID");
390 unsigned LocalIndex = static_cast<unsigned>(getCanonicalTypeID(TypeID));
391 return Storage[static_cast<unsigned>(Section::CanonicalTypes)][LocalIndex];
392 }
393
394 LLVM_ABI const ir2vec::Embedding &operator[](const Value &Arg) const {
395 unsigned LocalIndex = static_cast<unsigned>(getOperandKind(&Arg));
396 assert(LocalIndex < MaxOperandKinds && "Invalid OperandKind");
397 return Storage[static_cast<unsigned>(Section::Operands)][LocalIndex];
398 }
399
401 unsigned LocalIndex = getPredicateLocalIndex(P);
402 return Storage[static_cast<unsigned>(Section::Predicates)][LocalIndex];
403 }
404
405 /// Const Iterator type aliases
407
409 assert(isValid() && "IR2Vec Vocabulary is invalid");
410 return Storage.begin();
411 }
412
413 const_iterator cbegin() const { return begin(); }
414
416 assert(isValid() && "IR2Vec Vocabulary is invalid");
417 return Storage.end();
418 }
419
420 const_iterator cend() const { return end(); }
421
422 /// Returns the string key for a given index position in the vocabulary.
423 /// This is useful for debugging or printing the vocabulary. Do not use this
424 /// for embedding generation as string based lookups are inefficient.
425 LLVM_ABI static StringRef getStringKey(unsigned Pos);
426
427 /// Create a dummy vocabulary for testing purposes.
428 LLVM_ABI static VocabStorage createDummyVocabForTest(unsigned Dim = 1);
429
430 LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA,
431 ModuleAnalysisManager::Invalidator &Inv) const;
432
433private:
434 constexpr static unsigned NumCanonicalEntries =
436
437 // Base offsets for flat index computation
438 constexpr static unsigned OperandBaseOffset =
439 MaxOpcodes + MaxCanonicalTypeIDs;
440 constexpr static unsigned PredicateBaseOffset =
441 OperandBaseOffset + MaxOperandKinds;
442
443 /// Functions for predicate index calculations
444 static unsigned getPredicateLocalIndex(CmpInst::Predicate P);
445 static CmpInst::Predicate getPredicateFromLocalIndex(unsigned LocalIndex);
446
447 /// String mappings for CanonicalTypeID values
448 static constexpr StringLiteral CanonicalTypeNames[] = {
449 "FloatTy", "VoidTy", "LabelTy", "MetadataTy",
450 "VectorTy", "TokenTy", "IntegerTy", "FunctionTy",
451 "PointerTy", "StructTy", "ArrayTy", "UnknownTy"};
452 static_assert(std::size(CanonicalTypeNames) ==
453 static_cast<unsigned>(CanonicalTypeID::MaxCanonicalType),
454 "CanonicalTypeNames array size must match MaxCanonicalType");
455
456 /// String mappings for OperandKind values
457 static constexpr StringLiteral OperandKindNames[] = {"Function", "Pointer",
458 "Constant", "Variable"};
459 static_assert(std::size(OperandKindNames) ==
460 static_cast<unsigned>(OperandKind::MaxOperandKind),
461 "OperandKindNames array size must match MaxOperandKind");
462
463 /// Every known TypeID defined in llvm/IR/Type.h is expected to have a
464 /// corresponding mapping here in the same order as enum Type::TypeID.
465 static constexpr std::array<CanonicalTypeID, MaxTypeIDs> TypeIDMapping = {{
466 CanonicalTypeID::FloatTy, // HalfTyID = 0
467 CanonicalTypeID::FloatTy, // BFloatTyID
468 CanonicalTypeID::FloatTy, // FloatTyID
469 CanonicalTypeID::FloatTy, // DoubleTyID
470 CanonicalTypeID::FloatTy, // X86_FP80TyID
471 CanonicalTypeID::FloatTy, // FP128TyID
472 CanonicalTypeID::FloatTy, // PPC_FP128TyID
473 CanonicalTypeID::VoidTy, // VoidTyID
474 CanonicalTypeID::LabelTy, // LabelTyID
475 CanonicalTypeID::MetadataTy, // MetadataTyID
476 CanonicalTypeID::VectorTy, // X86_AMXTyID
477 CanonicalTypeID::TokenTy, // TokenTyID
478 CanonicalTypeID::IntegerTy, // IntegerTyID
479 CanonicalTypeID::FunctionTy, // FunctionTyID
480 CanonicalTypeID::PointerTy, // PointerTyID
481 CanonicalTypeID::StructTy, // StructTyID
482 CanonicalTypeID::ArrayTy, // ArrayTyID
483 CanonicalTypeID::VectorTy, // FixedVectorTyID
484 CanonicalTypeID::VectorTy, // ScalableVectorTyID
485 CanonicalTypeID::PointerTy, // TypedPointerTyID
486 CanonicalTypeID::UnknownTy // TargetExtTyID
487 }};
488 static_assert(TypeIDMapping.size() == MaxTypeIDs,
489 "TypeIDMapping must cover all Type::TypeID values");
490
491 /// Function to get vocabulary key for canonical type by enum
492 LLVM_ABI static StringRef
493 getVocabKeyForCanonicalTypeID(CanonicalTypeID CType) {
494 unsigned Index = static_cast<unsigned>(CType);
495 assert(Index < MaxCanonicalTypeIDs && "Invalid CanonicalTypeID");
496 return CanonicalTypeNames[Index];
497 }
498
499 /// Function to convert TypeID to CanonicalTypeID
500 LLVM_ABI static CanonicalTypeID getCanonicalTypeID(Type::TypeID TypeID) {
501 unsigned Index = static_cast<unsigned>(TypeID);
502 assert(Index < MaxTypeIDs && "Invalid TypeID");
503 return TypeIDMapping[Index];
504 }
505
506 /// Function to get the predicate enum value for a given index. Index is
507 /// relative to the predicates section of the vocabulary. E.g., Index 0
508 /// corresponds to the first predicate.
509 LLVM_ABI static CmpInst::Predicate getPredicate(unsigned Index) {
510 assert(Index < MaxPredicateKinds && "Invalid predicate index");
511 return getPredicateFromLocalIndex(Index);
512 }
513};
514
515/// Embedder provides the interface to generate embeddings (vector
516/// representations) for instructions, basic blocks, and functions. The
517/// vector representations are generated using IR2Vec algorithms.
518///
519/// The Embedder class is an abstract class and it is intended to be
520/// subclassed for different IR2Vec algorithms like Symbolic and Flow-aware.
521class Embedder {
522protected:
523 const Function &F;
525
526 /// Dimension of the vector representation; captured from the input vocabulary
527 const unsigned Dimension;
528
529 /// Weights for different entities (like opcode, arguments, types)
530 /// in the IR instructions to generate the vector representation.
532
533 // Utility maps - these are used to store the vector representations of
534 // instructions, basic blocks and functions.
538
539 LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab);
540
541 /// Function to compute embeddings. It generates embeddings for all
542 /// the instructions and basic blocks in the function F.
543 void computeEmbeddings() const;
544
545 /// Function to compute the embedding for a given basic block.
546 /// Specific to the kind of embeddings being computed.
547 virtual void computeEmbeddings(const BasicBlock &BB) const = 0;
548
549public:
550 virtual ~Embedder() = default;
551
552 /// Factory method to create an Embedder object.
553 LLVM_ABI static std::unique_ptr<Embedder>
555
556 /// Returns a map containing instructions and the corresponding embeddings for
557 /// the function F if it has been computed. If not, it computes the embeddings
558 /// for the function and returns the map.
560
561 /// Returns a map containing basic block and the corresponding embeddings for
562 /// the function F if it has been computed. If not, it computes the embeddings
563 /// for the function and returns the map.
564 LLVM_ABI const BBEmbeddingsMap &getBBVecMap() const;
565
566 /// Returns the embedding for a given basic block in the function F if it has
567 /// been computed. If not, it computes the embedding for the basic block and
568 /// returns it.
569 LLVM_ABI const Embedding &getBBVector(const BasicBlock &BB) const;
570
571 /// Computes and returns the embedding for the current function.
572 LLVM_ABI const Embedding &getFunctionVector() const;
573};
574
575/// Class for computing the Symbolic embeddings of IR2Vec.
576/// Symbolic embeddings are constructed based on the entity-level
577/// representations obtained from the Vocabulary.
579private:
580 void computeEmbeddings(const BasicBlock &BB) const override;
581
582public:
585};
586
587/// Class for computing the Flow-aware embeddings of IR2Vec.
588/// Flow-aware embeddings build on the vocabulary, just like Symbolic
589/// embeddings, and additionally capture the flow information in the IR.
591private:
592 void computeEmbeddings(const BasicBlock &BB) const override;
593
594public:
597};
598
599} // namespace ir2vec
600
601/// This analysis provides the vocabulary for IR2Vec. The vocabulary provides a
602/// mapping between an entity of the IR (like opcode, type, argument, etc.) and
603/// its corresponding embedding.
604class IR2VecVocabAnalysis : public AnalysisInfoMixin<IR2VecVocabAnalysis> {
605 using VocabMap = std::map<std::string, ir2vec::Embedding>;
606 std::optional<ir2vec::VocabStorage> Vocab;
607
608 Error readVocabulary(VocabMap &OpcVocab, VocabMap &TypeVocab,
609 VocabMap &ArgVocab);
610 void generateVocabStorage(VocabMap &OpcVocab, VocabMap &TypeVocab,
611 VocabMap &ArgVocab);
612 void emitError(Error Err, LLVMContext &Ctx);
613
614public:
618 : Vocab(std::move(Vocab)) {}
621};
622
623/// This pass prints the IR2Vec embeddings for instructions, basic blocks, and
624/// functions.
625class IR2VecPrinterPass : public PassInfoMixin<IR2VecPrinterPass> {
626 raw_ostream &OS;
627
628public:
629 explicit IR2VecPrinterPass(raw_ostream &OS) : OS(OS) {}
631 static bool isRequired() { return true; }
632};
633
634/// This pass prints the embeddings in the vocabulary
635class IR2VecVocabPrinterPass : public PassInfoMixin<IR2VecVocabPrinterPass> {
636 raw_ostream &OS;
637
638public:
639 explicit IR2VecVocabPrinterPass(raw_ostream &OS) : OS(OS) {}
641 static bool isRequired() { return true; }
642};
643
644} // namespace llvm
645
646#endif // LLVM_ANALYSIS_IR2VEC_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ABI
Definition Compiler.h:213
This file defines the DenseMap class.
Provides ErrorOr<T> smart pointer.
This header defines various interfaces for pass management in LLVM.
This file supports working with JSON data.
Type::TypeID TypeID
#define P(N)
ModuleAnalysisManager MAM
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Value * RHS
LLVM Basic Block Representation.
Definition BasicBlock.h:62
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
IR2VecPrinterPass(raw_ostream &OS)
Definition IR2Vec.h:629
static bool isRequired()
Definition IR2Vec.h:631
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:668
This analysis provides the vocabulary for IR2Vec.
Definition IR2Vec.h:604
ir2vec::Vocabulary Result
Definition IR2Vec.h:619
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:629
LLVM_ABI IR2VecVocabAnalysis(ir2vec::VocabStorage &&Vocab)
Definition IR2Vec.h:617
static LLVM_ABI AnalysisKey Key
Definition IR2Vec.h:615
static bool isRequired()
Definition IR2Vec.h:641
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:710
IR2VecVocabPrinterPass(raw_ostream &OS)
Definition IR2Vec.h:639
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TypeID
Definitions of all of the base types for the Type system.
Definition Type.h:54
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI const Embedding & getBBVector(const BasicBlock &BB) const
Returns the embedding for a given basic block in the function F if it has been computed.
Definition IR2Vec.cpp:184
static LLVM_ABI std::unique_ptr< Embedder > create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab)
Factory method to create an Embedder object.
Definition IR2Vec.cpp:161
BBEmbeddingsMap BBVecMap
Definition IR2Vec.h:536
LLVM_ABI const BBEmbeddingsMap & getBBVecMap() const
Returns a map containing basic block and the corresponding embeddings for the function F if it has be...
Definition IR2Vec.cpp:178
const Vocabulary & Vocab
Definition IR2Vec.h:524
void computeEmbeddings() const
Function to compute embeddings.
Definition IR2Vec.cpp:199
virtual ~Embedder()=default
const float TypeWeight
Definition IR2Vec.h:531
LLVM_ABI const InstEmbeddingsMap & getInstVecMap() const
Returns a map containing instructions and the corresponding embeddings for the function F if it has b...
Definition IR2Vec.cpp:172
const float OpcWeight
Weights for different entities (like opcode, arguments, types) in the IR instructions to generate the...
Definition IR2Vec.h:531
const unsigned Dimension
Dimension of the vector representation; captured from the input vocabulary.
Definition IR2Vec.h:527
LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab)
Definition IR2Vec.cpp:156
const float ArgWeight
Definition IR2Vec.h:531
Embedding FuncVector
Definition IR2Vec.h:535
virtual void computeEmbeddings(const BasicBlock &BB) const =0
Function to compute the embedding for a given basic block.
LLVM_ABI const Embedding & getFunctionVector() const
Computes and returns the embedding for the current function.
Definition IR2Vec.cpp:192
InstEmbeddingsMap InstVecMap
Definition IR2Vec.h:537
const Function & F
Definition IR2Vec.h:523
FlowAwareEmbedder(const Function &F, const Vocabulary &Vocab)
Definition IR2Vec.h:595
SymbolicEmbedder(const Function &F, const Vocabulary &Vocab)
Definition IR2Vec.h:583
Iterator support for section-based access.
Definition IR2Vec.h:193
const_iterator(const VocabStorage *Storage, unsigned SectionId, size_t LocalIndex)
Definition IR2Vec.h:199
LLVM_ABI bool operator!=(const const_iterator &Other) const
Definition IR2Vec.cpp:328
LLVM_ABI const_iterator & operator++()
Definition IR2Vec.cpp:309
LLVM_ABI const Embedding & operator*() const
Definition IR2Vec.cpp:302
LLVM_ABI bool operator==(const const_iterator &Other) const
Definition IR2Vec.cpp:322
Generic storage class for section-based vocabularies.
Definition IR2Vec.h:151
static Error parseVocabSection(StringRef Key, const json::Value &ParsedVocabValue, VocabMap &TargetVocab, unsigned &Dim)
Parse a vocabulary section from JSON and populate the target vocabulary map.
Definition IR2Vec.cpp:333
const_iterator end() const
Definition IR2Vec.h:210
unsigned getNumSections() const
Get number of sections.
Definition IR2Vec.h:176
VocabStorage()
Default constructor creates empty storage (invalid state)
Definition IR2Vec.h:161
VocabStorage & operator=(VocabStorage &&)=delete
VocabStorage & operator=(const VocabStorage &)=delete
unsigned getDimension() const
Get vocabulary dimension.
Definition IR2Vec.h:187
size_t size() const
Get total number of entries across all sections.
Definition IR2Vec.h:173
const_iterator begin() const
Definition IR2Vec.h:209
bool isValid() const
Check if vocabulary is valid (has data)
Definition IR2Vec.h:190
VocabStorage(VocabStorage &&)=default
std::map< std::string, Embedding > VocabMap
Definition IR2Vec.h:214
const std::vector< Embedding > & operator[](unsigned SectionId) const
Section-based access: Storage[sectionId][localIndex].
Definition IR2Vec.h:181
VocabStorage(const VocabStorage &)=delete
Class for storing and accessing the IR2Vec vocabulary.
Definition IR2Vec.h:239
static LLVM_ABI StringRef getVocabKeyForOperandKind(OperandKind Kind)
Function to get vocabulary key for a given OperandKind.
Definition IR2Vec.h:349
LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv) const
Definition IR2Vec.cpp:443
const_iterator begin() const
Definition IR2Vec.h:408
LLVM_ABI unsigned getDimension() const
Definition IR2Vec.h:331
Vocabulary(Vocabulary &&)=default
static LLVM_ABI OperandKind getOperandKind(const Value *Op)
Function to classify an operand into OperandKind.
Definition IR2Vec.cpp:386
static LLVM_ABI unsigned getIndex(CmpInst::Predicate P)
Definition IR2Vec.h:378
Vocabulary & operator=(const Vocabulary &)=delete
static LLVM_ABI StringRef getStringKey(unsigned Pos)
Returns the string key for a given index position in the vocabulary.
Definition IR2Vec.cpp:425
static constexpr unsigned MaxCanonicalTypeIDs
Definition IR2Vec.h:309
LLVM_ABI const ir2vec::Embedding & operator[](CmpInst::Predicate P) const
Definition IR2Vec.h:400
static constexpr unsigned MaxOperandKinds
Definition IR2Vec.h:311
Vocabulary(const Vocabulary &)=delete
const_iterator cbegin() const
Definition IR2Vec.h:413
OperandKind
Operand kinds supported by IR2Vec Vocabulary.
Definition IR2Vec.h:295
static constexpr size_t getCanonicalSize()
Total number of entries (opcodes + canonicalized types + operand kinds + predicates)
Definition IR2Vec.h:338
static LLVM_ABI unsigned getIndex(const Value &Op)
Definition IR2Vec.h:372
static LLVM_ABI StringRef getVocabKeyForPredicate(CmpInst::Predicate P)
Function to get vocabulary key for a given predicate.
Definition IR2Vec.cpp:415
static constexpr unsigned MaxTypeIDs
Definition IR2Vec.h:308
LLVM_ABI Vocabulary(VocabStorage &&Storage)
Definition IR2Vec.h:319
LLVM_ABI const ir2vec::Embedding & operator[](Type::TypeID TypeID) const
Definition IR2Vec.h:388
static LLVM_ABI unsigned getIndex(Type::TypeID TypeID)
Definition IR2Vec.h:367
const_iterator end() const
Definition IR2Vec.h:415
static LLVM_ABI StringRef getVocabKeyForOpcode(unsigned Opcode)
Function to get vocabulary key for a given Opcode.
Definition IR2Vec.cpp:374
static LLVM_ABI StringRef getVocabKeyForTypeID(Type::TypeID TypeID)
Function to get vocabulary key for a given TypeID.
Definition IR2Vec.h:344
VocabStorage::const_iterator const_iterator
Const Iterator type aliases.
Definition IR2Vec.h:406
const_iterator cend() const
Definition IR2Vec.h:420
static LLVM_ABI unsigned getIndex(unsigned Opcode)
Functions to return flat index.
Definition IR2Vec.h:362
LLVM_ABI bool isValid() const
Definition IR2Vec.h:327
Vocabulary & operator=(Vocabulary &&Other)=delete
LLVM_ABI const ir2vec::Embedding & operator[](unsigned Opcode) const
Accessors to get the embedding for a given entity.
Definition IR2Vec.h:383
static LLVM_ABI VocabStorage createDummyVocabForTest(unsigned Dim=1)
Create a dummy vocabulary for testing purposes.
Definition IR2Vec.cpp:449
static constexpr unsigned MaxPredicateKinds
Definition IR2Vec.h:315
CanonicalTypeID
Canonical type IDs supported by IR2Vec Vocabulary.
Definition IR2Vec.h:278
LLVM_ABI const ir2vec::Embedding & operator[](const Value &Arg) const
Definition IR2Vec.h:394
A Value is an JSON value of unknown type.
Definition JSON.h:290
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
DenseMap< const Instruction *, Embedding > InstEmbeddingsMap
Definition IR2Vec.h:145
LLVM_ABI cl::opt< float > ArgWeight
DenseMap< const BasicBlock *, Embedding > BBEmbeddingsMap
Definition IR2Vec.h:146
LLVM_ABI cl::opt< float > OpcWeight
LLVM_ABI cl::opt< float > TypeWeight
LLVM_ABI cl::opt< IR2VecKind > IR2VecEmbeddingKind
llvm::cl::OptionCategory IR2VecCategory
This is an optimization pass for GlobalISel generic memory operations.
IR2VecKind
IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware.
Definition IR2Vec.h:71
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
@ Other
Any other memory.
Definition ModRef.h:68
DWARFExpression::Operation Op
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1847
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition PassManager.h:93
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
Embedding is a datatype that wraps std::vector<double>.
Definition IR2Vec.h:87
const_iterator end() const
Definition IR2Vec.h:119
LLVM_ABI bool approximatelyEquals(const Embedding &RHS, double Tolerance=1e-4) const
Returns true if the embedding is approximately equal to the RHS embedding within the specified tolera...
Definition IR2Vec.cpp:132
const_iterator cbegin() const
Definition IR2Vec.h:120
LLVM_ABI Embedding & operator+=(const Embedding &RHS)
Arithmetic operators.
Definition IR2Vec.cpp:87
LLVM_ABI Embedding operator-(const Embedding &RHS) const
Definition IR2Vec.cpp:107
const std::vector< double > & getData() const
Definition IR2Vec.h:123
typename std::vector< double >::const_iterator const_iterator
Definition IR2Vec.h:114
Embedding(size_t Size, double InitialValue)
Definition IR2Vec.h:98
LLVM_ABI Embedding & operator-=(const Embedding &RHS)
Definition IR2Vec.cpp:100
const_iterator cend() const
Definition IR2Vec.h:121
LLVM_ABI Embedding operator*(double Factor) const
Definition IR2Vec.cpp:119
size_t size() const
Definition IR2Vec.h:100
LLVM_ABI Embedding & operator*=(double Factor)
Definition IR2Vec.cpp:113
Embedding(std::initializer_list< double > IL)
Definition IR2Vec.h:95
Embedding(const std::vector< double > &V)
Definition IR2Vec.h:93
LLVM_ABI Embedding operator+(const Embedding &RHS) const
Definition IR2Vec.cpp:94
bool empty() const
Definition IR2Vec.h:101
typename std::vector< double >::iterator iterator
Definition IR2Vec.h:113
LLVM_ABI Embedding & scaleAndAdd(const Embedding &Src, float Factor)
Adds Src Embedding scaled by Factor with the called Embedding.
Definition IR2Vec.cpp:125
Embedding(std::vector< double > &&V)
Definition IR2Vec.h:94
const double & operator[](size_t Itr) const
Definition IR2Vec.h:108
Embedding(size_t Size)
Definition IR2Vec.h:97
LLVM_ABI void print(raw_ostream &OS) const
Definition IR2Vec.cpp:145
const_iterator begin() const
Definition IR2Vec.h:118
double & operator[](size_t Itr)
Definition IR2Vec.h:103