LLVM 21.0.0git
StringToOffsetTable.h
Go to the documentation of this file.
1//===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TABLEGEN_STRINGTOOFFSETTABLE_H
10#define LLVM_TABLEGEN_STRINGTOOFFSETTABLE_H
11
14#include "llvm/ADT/StringMap.h"
17#include <optional>
18
19namespace llvm {
20
21/// StringToOffsetTable - This class uniques a bunch of nul-terminated strings
22/// and keeps track of their offset in a massive contiguous string allocation.
23/// It can then output this string blob and use indexes into the string to
24/// reference each piece.
26 StringMap<unsigned> StringOffset;
27 std::string AggregateString;
28
29public:
31 // Ensure we always put the empty string at offset zero. That lets empty
32 // initialization also be zero initialization for offsets into the table.
34 }
35
36 bool empty() const { return StringOffset.empty(); }
37 size_t size() const { return AggregateString.size(); }
38
39 unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
40 auto [II, Inserted] = StringOffset.insert({Str, size()});
41 if (Inserted) {
42 // Add the string to the aggregate if this is the first time found.
43 AggregateString.append(Str.begin(), Str.end());
44 if (appendZero)
45 AggregateString += '\0';
46 }
47
48 return II->second;
49 }
50
51 // Returns the offset of `Str` in the table if its preset, else return
52 // std::nullopt.
53 std::optional<unsigned> GetStringOffset(StringRef Str) const {
54 auto II = StringOffset.find(Str);
55 if (II == StringOffset.end())
56 return std::nullopt;
57 return II->second;
58 }
59
60 // Emit a string table definition with the provided name and indent.
61 //
62 // When possible, this uses string-literal concatenation to emit the string
63 // contents in a readable and searchable way. However, for (very) large string
64 // tables MSVC cannot reliably use string literals and so there we use a large
65 // character array. We still use a line oriented emission and add comments to
66 // provide searchability even in this case.
67 //
68 // The string table, and its input string contents, are always emitted as both
69 // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
70 // valid identifiers to declare.
72 const Twine &Indent = "") const {
73 OS << formatv(R"(
74#ifdef __GNUC__
75#pragma GCC diagnostic push
76#pragma GCC diagnostic ignored "-Woverlength-strings"
77#endif
78{0}static constexpr char {1}Storage[] = )",
79 Indent, Name);
80
81 // MSVC silently miscompiles string literals longer than 64k in some
82 // circumstances. When the string table is longer, emit it as an array of
83 // character literals.
84 bool UseChars = AggregateString.size() > (64 * 1024);
85 OS << (UseChars ? "{\n" : "\n");
86
87 llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
88 llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
89 // We should always have an empty string at the start, and because these are
90 // null terminators rather than separators, we'll have one at the end as
91 // well. Skip the end one.
92 assert(Strings.front().empty() && "Expected empty initial string!");
93 assert(Strings.back().empty() &&
94 "Expected empty string at the end due to terminators!");
95 Strings.pop_back();
96 for (StringRef Str : Strings) {
97 OS << LineSep << Indent << " ";
98 // If we can, just emit this as a string literal to be concatenated.
99 if (!UseChars) {
100 OS << "\"";
101 OS.write_escaped(Str);
102 OS << "\\0\"";
103 continue;
104 }
105
106 llvm::ListSeparator CharSep(", ");
107 for (char C : Str) {
108 OS << CharSep << "'";
109 OS.write_escaped(StringRef(&C, 1));
110 OS << "'";
111 }
112 OS << CharSep << "'\\0'";
113 }
114 OS << LineSep << Indent << (UseChars ? "};" : " ;");
115
116 OS << formatv(R"(
117#ifdef __GNUC__
118#pragma GCC diagnostic pop
119#endif
120
121{0}static constexpr llvm::StringTable {1} =
122{0} {1}Storage;
123)",
124 Indent, Name);
125 }
126
127 // Emit the string as one single string.
128 void EmitString(raw_ostream &O) const {
129 // Escape the string.
130 SmallString<256> EscapedStr;
131 raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
132
133 O << " \"";
134 unsigned CharsPrinted = 0;
135 for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
136 if (CharsPrinted > 70) {
137 O << "\"\n \"";
138 CharsPrinted = 0;
139 }
140 O << EscapedStr[i];
141 ++CharsPrinted;
142
143 // Print escape sequences all together.
144 if (EscapedStr[i] != '\\')
145 continue;
146
147 assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
148 if (isDigit(EscapedStr[i + 1])) {
149 assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
150 "Expected 3 digit octal escape!");
151 O << EscapedStr[++i];
152 O << EscapedStr[++i];
153 O << EscapedStr[++i];
154 CharsPrinted += 3;
155 } else {
156 O << EscapedStr[++i];
157 ++CharsPrinted;
158 }
159 }
160 O << "\"";
161 }
162};
163
164} // end namespace llvm
165
166#endif
This file defines the StringMap class.
std::string Name
uint64_t IntrinsicInst * II
static bool isDigit(const char C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:78
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
bool empty() const
Definition: StringMap.h:103
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:128
iterator end()
Definition: StringMap.h:220
iterator find(StringRef Key)
Definition: StringMap.h:233
bool insert(MapEntryTy *KeyValue)
insert - Insert the specified key/value pair into the map.
Definition: StringMap.h:308
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
StringToOffsetTable - This class uniques a bunch of nul-terminated strings and keeps track of their o...
void EmitString(raw_ostream &O) const
void EmitStringTableDef(raw_ostream &OS, const Twine &Name, const Twine &Indent="") const
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero=true)
std::optional< unsigned > GetStringOffset(StringRef Str) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)