LLVM 22.0.0git
TextEncoding.h
Go to the documentation of this file.
1//===-- TextEncoding.h - Text encoding conversion class -----------*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a utility class to convert between different character
11/// set encodings.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_SUPPORT_TEXT_ENCODING_H
16#define LLVM_SUPPORT_TEXT_ENCODING_H
17
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Config/config.h"
23
24#include <string>
25#include <system_error>
26
27namespace llvm {
28
29template <typename T> class SmallVectorImpl;
30
31namespace details {
33
34private:
35 /// Converts a string.
36 /// \param[in] Source source string
37 /// \param[out] Result container for converted string
38 /// \return error code in case something went wrong
39 ///
40 /// The following error codes can occur, among others:
41 /// - std::errc::argument_list_too_long: The result requires more than
42 /// std::numeric_limits<size_t>::max() bytes.
43 /// - std::errc::illegal_byte_sequence: The input contains an invalid
44 /// multibyte sequence.
45 /// - std::errc::invalid_argument: The input contains an incomplete
46 /// multibyte sequence.
47 ///
48 /// If the destination encoding is stateful, the shift state will be set
49 /// to the initial state.
50 ///
51 /// In case of an error, the result string contains the successfully converted
52 /// part of the input string.
53 ///
54 virtual std::error_code convertString(StringRef Source,
55 SmallVectorImpl<char> &Result) = 0;
56
57 /// Resets the converter to the initial state.
58 virtual void reset() = 0;
59
60public:
61 virtual ~TextEncodingConverterImplBase() = default;
62
63 /// Converts a string and resets the converter to the initial state.
64 std::error_code convert(StringRef Source, SmallVectorImpl<char> &Result) {
65 auto EC = convertString(Source, Result);
66 reset();
67 return EC;
68 }
69};
70} // namespace details
71
72// Names inspired by https://wg21.link/p1885.
73enum class TextEncoding {
74 /// UTF-8 character set encoding.
75 UTF8,
76
77 /// IBM EBCDIC 1047 character set encoding.
79};
80
81/// Utility class to convert between different character encodings.
83 std::unique_ptr<details::TextEncodingConverterImplBase> Converter;
84
86 std::unique_ptr<details::TextEncodingConverterImplBase> Converter)
87 : Converter(std::move(Converter)) {}
88
89public:
90 /// Creates a TextEncodingConverter instance.
91 /// Returns std::errc::invalid_argument in case the requested conversion is
92 /// not supported.
93 /// \param[in] From the source character encoding
94 /// \param[in] To the target character encoding
95 /// \return a TextEncodingConverter instance or an error code
97 TextEncoding To);
98
99 /// Creates a TextEncodingConverter instance.
100 /// Returns std::errc::invalid_argument in case the requested conversion is
101 /// not supported.
102 /// \param[in] From name of the source character encoding
103 /// \param[in] To name of the target character encoding
104 /// \return a TextEncodingConverter instance or an error code
106 StringRef To);
107
110
113
115 if (this != &Other)
116 Converter = std::move(Other.Converter);
117 return *this;
118 }
119
121
122 /// Converts a string.
123 /// \param[in] Source source string
124 /// \param[out] Result container for converted string
125 /// \return error code in case something went wrong
126 std::error_code convert(StringRef Source,
127 SmallVectorImpl<char> &Result) const {
128 return Converter->convert(Source, Result);
129 }
130
132 SmallString<100> Result;
133 auto EC = Converter->convert(Source, Result);
134 if (!EC)
135 return std::string(Result);
136 return EC;
137 }
138};
139
140} // namespace llvm
141
142#endif
BlockVerifier::State From
#define LLVM_ABI
Definition: Compiler.h:213
Early If Converter
Provides ErrorOr<T> smart pointer.
This file defines the SmallString class.
Represents either an error or a value T.
Definition: ErrorOr.h:56
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Utility class to convert between different character encodings.
Definition: TextEncoding.h:82
TextEncodingConverter(const TextEncodingConverter &)=delete
TextEncodingConverter & operator=(const TextEncodingConverter &)=delete
std::error_code convert(StringRef Source, SmallVectorImpl< char > &Result) const
Converts a string.
Definition: TextEncoding.h:126
TextEncodingConverter(TextEncodingConverter &&Other)
Definition: TextEncoding.h:111
ErrorOr< std::string > convert(StringRef Source) const
Definition: TextEncoding.h:131
static LLVM_ABI ErrorOr< TextEncodingConverter > create(TextEncoding From, TextEncoding To)
Creates a TextEncodingConverter instance.
TextEncodingConverter & operator=(TextEncodingConverter &&Other)
Definition: TextEncoding.h:114
std::error_code convert(StringRef Source, SmallVectorImpl< char > &Result)
Converts a string and resets the converter to the initial state.
Definition: TextEncoding.h:64
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
TextEncoding
Definition: TextEncoding.h:73
@ IBM1047
IBM EBCDIC 1047 character set encoding.
@ Other
Any other memory.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
unsigned char UTF8
Definition: ConvertUTF.h:131
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856