105#ifndef LLVM_SUPPORT_CONVERTUTF_H
106#define LLVM_SUPPORT_CONVERTUTF_H
113#include <system_error>
135#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
136#define UNI_MAX_BMP (UTF32)0x0000FFFF
137#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
138#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
139#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
141#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
143#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
144#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
146#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF
147#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000
162 const UTF8 *sourceEnd,
172 const UTF8 *sourceEnd,
182 const UTF8 *sourceEnd,
188 const UTF16 *sourceEnd,
194 const UTF32 *sourceEnd,
200 const UTF16 *sourceEnd,
206 const UTF32 *sourceEnd,
216 const UTF8 *sourceEnd);
223template <
typename T>
class ArrayRef;
224template <
typename T>
class SmallVectorImpl;
237 char *&ResultPtr,
const UTF8 *&ErrorPtr);
256 std::string &Result);
286 const UTF8 *sourceEnd,
289 if (*source == sourceEnd)
347 SmallVectorImpl<UTF16> &DstUTF16);
356LLVM_ABI std::error_code UTF8ToUTF16(StringRef utf8,
357 SmallVectorImpl<wchar_t> &utf16);
359LLVM_ABI std::error_code CurCPToUTF16(StringRef utf8,
360 SmallVectorImpl<wchar_t> &utf16);
361LLVM_ABI std::error_code UTF16ToUTF8(
const wchar_t *utf16,
size_t utf16_len,
362 SmallVectorImpl<char> &utf8);
364LLVM_ABI std::error_code UTF16ToCurCP(
const wchar_t *utf16,
size_t utf16_len,
365 SmallVectorImpl<char> &utf8);
StringRef - Represent a constant reference to a string, i.e.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
LLVM_ABI bool IsSingleCodeUnitUTF16Codepoint(unsigned)
LLVM_ABI bool IsSingleCodeUnitUTF32Codepoint(unsigned)
LLVM_ABI unsigned getNumBytesForUTF8(UTF8 firstByte)
LLVM_ABI bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)
Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark.
LLVM_ABI ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
ConversionResult convertUTF8Sequence(const UTF8 **source, const UTF8 *sourceEnd, UTF32 *target, ConversionFlags flags)
Convert the first UTF8 sequence in the given source buffer to a UTF32 code point.
LLVM_ABI bool convertWideToUTF8(const std::wstring &Source, std::string &Result)
Converts a std::wstring to a UTF-8 encoded std::string.
LLVM_ABI bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
LLVM_ABI ConversionResult ConvertUTF32toUTF16(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
LLVM_ABI bool IsSingleCodeUnitUTF8Codepoint(unsigned)
LLVM_ABI Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
LLVM_ABI ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
LLVM_ABI bool convertUTF32ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
LLVM_ABI bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)
Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.
LLVM_ABI ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
LLVM_ABI Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
LLVM_ABI unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd)
LLVM_ABI ConversionResult ConvertUTF16toUTF32(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
LLVM_ABI bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)
Converts a UTF-8 string into a UTF-16 string with native endianness.
LLVM_ABI bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)
Convert an Unicode code point to UTF8 sequence.
LLVM_ABI ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)