35#include <system_error>
66 if (Input.
size() >= 4) {
71 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
75 if (Input.
size() >= 2 && Input[1] != 0)
79 if ( Input.
size() >= 4
93 if ( Input.
size() >= 3
101 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
104 if (Input.
size() >= 2 && Input[1] == 0)
111void Node::anchor() {}
112void NullNode::anchor() {}
113void ScalarNode::anchor() {}
114void BlockScalarNode::anchor() {}
115void KeyValueNode::anchor() {}
116void MappingNode::anchor() {}
117void SequenceNode::anchor() {}
118void AliasNode::anchor() {}
181 unsigned FlowLevel = 0;
182 bool IsRequired =
false;
184 bool operator ==(
const SimpleKey &
Other) {
185 return Tok ==
Other.Tok;
201 if (Position <
End && (*Position & 0x80) == 0) {
202 return std::make_pair(*Position, 1);
206 if (Position + 1 <
End && ((*Position & 0xE0) == 0xC0) &&
207 ((*(Position + 1) & 0xC0) == 0x80)) {
208 uint32_t codepoint = ((*Position & 0x1F) << 6) |
209 (*(Position + 1) & 0x3F);
210 if (codepoint >= 0x80)
211 return std::make_pair(codepoint, 2);
215 if (Position + 2 <
End && ((*Position & 0xF0) == 0xE0) &&
216 ((*(Position + 1) & 0xC0) == 0x80) &&
217 ((*(Position + 2) & 0xC0) == 0x80)) {
218 uint32_t codepoint = ((*Position & 0x0F) << 12) |
219 ((*(Position + 1) & 0x3F) << 6) |
220 (*(Position + 2) & 0x3F);
223 if (codepoint >= 0x800 &&
224 (codepoint < 0xD800 || codepoint > 0xDFFF))
225 return std::make_pair(codepoint, 3);
229 if (Position + 3 <
End && ((*Position & 0xF8) == 0xF0) &&
230 ((*(Position + 1) & 0xC0) == 0x80) &&
231 ((*(Position + 2) & 0xC0) == 0x80) &&
232 ((*(Position + 3) & 0xC0) == 0x80)) {
233 uint32_t codepoint = ((*Position & 0x07) << 18) |
234 ((*(Position + 1) & 0x3F) << 12) |
235 ((*(Position + 2) & 0x3F) << 6) |
236 (*(Position + 3) & 0x3F);
237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
238 return std::make_pair(codepoint, 4);
240 return std::make_pair(0, 0);
250 std::error_code *EC =
nullptr);
252 std::error_code *EC =
nullptr);
262 SM.
PrintMessage(Loc, Kind, Message, Ranges, {}, ShowColors);
299 return ::decodeUTF8(
StringRef(Position,
End - Position));
375 void advanceWhile(SkipWhileFunc Func);
380 void scan_ns_uri_char();
404 bool consumeLineBreakIfPresent();
415 void removeStaleSimpleKeyCandidates();
418 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
422 bool unrollIndent(
int ToColumn);
426 bool rollIndent(
int ToColumn
435 void scanToNextToken();
438 bool scanStreamStart();
441 bool scanStreamEnd();
444 bool scanDirective();
447 bool scanDocumentIndicator(
bool IsStart);
450 bool scanFlowCollectionStart(
bool IsSequence);
453 bool scanFlowCollectionEnd(
bool IsSequence);
456 bool scanFlowEntry();
459 bool scanBlockEntry();
468 bool scanFlowScalar(
bool IsDoubleQuoted);
471 bool scanPlainScalar();
474 bool scanAliasOrAnchor(
bool IsAlias);
477 bool scanBlockScalar(
bool IsLiteral);
485 bool scanBlockScalarIndicators(
char &StyleIndicator,
char &ChompingIndicator,
486 unsigned &IndentIndicator,
bool &IsDone);
489 char scanBlockStyleIndicator();
492 char scanBlockChompingIndicator();
495 unsigned scanBlockIndentationIndicator();
500 bool scanBlockScalarHeader(
char &ChompingIndicator,
unsigned &IndentIndicator,
506 bool findBlockScalarIndent(
unsigned &BlockIndent,
unsigned BlockExitIndent,
507 unsigned &LineBreaks,
bool &IsDone);
512 bool scanBlockScalarIndent(
unsigned BlockIndent,
unsigned BlockExitIndent,
519 bool fetchMoreTokens();
546 bool IsStartOfStream;
549 bool IsSimpleKeyAllowed;
553 bool IsAdjacentValueAllowedInFlow;
581 if (UnicodeScalarValue <= 0x7F) {
582 Result.push_back(UnicodeScalarValue & 0x7F);
583 }
else if (UnicodeScalarValue <= 0x7FF) {
584 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
585 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
586 Result.push_back(FirstByte);
587 Result.push_back(SecondByte);
588 }
else if (UnicodeScalarValue <= 0xFFFF) {
589 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
590 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
591 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
592 Result.push_back(FirstByte);
593 Result.push_back(SecondByte);
594 Result.push_back(ThirdByte);
595 }
else if (UnicodeScalarValue <= 0x10FFFF) {
596 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
597 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
598 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
599 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
600 Result.push_back(FirstByte);
601 Result.push_back(SecondByte);
602 Result.push_back(ThirdByte);
603 Result.push_back(FourthByte);
614 OS <<
"Stream-Start: ";
617 OS <<
"Stream-End: ";
620 OS <<
"Version-Directive: ";
623 OS <<
"Tag-Directive: ";
626 OS <<
"Document-Start: ";
629 OS <<
"Document-End: ";
632 OS <<
"Block-Entry: ";
638 OS <<
"Block-Sequence-Start: ";
641 OS <<
"Block-Mapping-Start: ";
644 OS <<
"Flow-Entry: ";
647 OS <<
"Flow-Sequence-Start: ";
650 OS <<
"Flow-Sequence-End: ";
653 OS <<
"Flow-Mapping-Start: ";
656 OS <<
"Flow-Mapping-End: ";
668 OS <<
"Block Scalar: ";
682 OS <<
T.Range <<
"\n";
705 std::string EscapedInput;
708 EscapedInput +=
"\\\\";
710 EscapedInput +=
"\\\"";
712 EscapedInput +=
"\\0";
714 EscapedInput +=
"\\a";
716 EscapedInput +=
"\\b";
718 EscapedInput +=
"\\t";
720 EscapedInput +=
"\\n";
722 EscapedInput +=
"\\v";
724 EscapedInput +=
"\\f";
726 EscapedInput +=
"\\r";
728 EscapedInput +=
"\\e";
729 else if ((
unsigned char)*i < 0x20) {
730 std::string HexStr = utohexstr(*i);
731 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
732 }
else if (*i & 0x80) {
735 if (UnicodeScalarValue.second == 0) {
743 if (UnicodeScalarValue.first == 0x85)
744 EscapedInput +=
"\\N";
745 else if (UnicodeScalarValue.first == 0xA0)
746 EscapedInput +=
"\\_";
747 else if (UnicodeScalarValue.first == 0x2028)
748 EscapedInput +=
"\\L";
749 else if (UnicodeScalarValue.first == 0x2029)
750 EscapedInput +=
"\\P";
751 else if (!EscapePrintable &&
753 EscapedInput +=
StringRef(i, UnicodeScalarValue.second);
755 std::string HexStr = utohexstr(UnicodeScalarValue.first);
756 if (HexStr.size() <= 2)
757 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
758 else if (HexStr.size() <= 4)
759 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
760 else if (HexStr.size() <= 8)
761 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
763 i += UnicodeScalarValue.second - 1;
765 EscapedInput.push_back(*i);
859 : SM(sm), ShowColors(ShowColors), EC(EC) {
865 : SM(SM_), ShowColors(ShowColors), EC(EC) {
870 InputBuffer = Buffer;
877 IsStartOfStream =
true;
878 IsSimpleKeyAllowed =
true;
879 IsAdjacentValueAllowedInFlow =
false;
881 std::unique_ptr<MemoryBuffer> InputBufferOwner =
889 bool NeedMore =
false;
891 if (TokenQueue.
empty() || NeedMore) {
892 if (!fetchMoreTokens()) {
896 return TokenQueue.
front();
900 "fetchMoreTokens lied about getting tokens!");
902 removeStaleSimpleKeyCandidates();
904 SK.Tok = TokenQueue.
begin();
910 return TokenQueue.
front();
916 if (!TokenQueue.
empty())
921 if (TokenQueue.
empty())
931 if ( *Position == 0x09
932 || (*Position >= 0x20 && *Position <= 0x7E))
936 if (
uint8_t(*Position) & 0x80) {
939 && u8d.first != 0xFEFF
940 && ( u8d.first == 0x85
941 || ( u8d.first >= 0xA0
942 && u8d.first <= 0xD7FF)
943 || ( u8d.first >= 0xE000
944 && u8d.first <= 0xFFFD)
945 || ( u8d.first >= 0x10000
946 && u8d.first <= 0x10FFFF)))
947 return Position + u8d.second;
955 if (*Position == 0x0D) {
956 if (Position + 1 != End && *(Position + 1) == 0x0A)
961 if (*Position == 0x0A)
969 if (*Position ==
' ')
977 if (*Position ==
' ' || *Position ==
'\t')
985 if (*Position ==
' ' || *Position ==
'\t')
987 return skip_nb_char(Position);
1001void Scanner::advanceWhile(SkipWhileFunc Func) {
1002 auto Final = skip_while(Func, Current);
1003 Column += Final - Current;
1011void Scanner::scan_ns_uri_char() {
1015 if ((*Current ==
'%' && Current + 2 < End &&
1018 StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]") !=
1030 setError(
"Cannot consume non-ascii characters", Current);
1035 if (
uint8_t(*Current) >= 0x80) {
1036 setError(
"Cannot consume non-ascii characters", Current);
1047void Scanner::skip(
uint32_t Distance) {
1048 Current += Distance;
1050 assert(Current <= End &&
"Skipped past the end");
1054 if (Position == End)
1056 return *Position ==
' ' || *Position ==
'\t' || *Position ==
'\r' ||
1061 if (Position == End || isBlankOrBreak(Position))
1069bool Scanner::isLineEmpty(
StringRef Line) {
1070 for (
const auto *Position =
Line.begin(); Position !=
Line.end(); ++Position)
1071 if (!isBlankOrBreak(Position))
1076bool Scanner::consumeLineBreakIfPresent() {
1077 auto Next = skip_b_break(Current);
1078 if (Next == Current)
1088 ,
bool IsRequired) {
1089 if (IsSimpleKeyAllowed) {
1093 SK.Column = AtColumn;
1094 SK.IsRequired = IsRequired;
1095 SK.FlowLevel = FlowLevel;
1100void Scanner::removeStaleSimpleKeyCandidates() {
1102 i != SimpleKeys.
end();) {
1103 if (i->Line != Line || i->Column + 1024 < Column) {
1105 setError(
"Could not find expected : for simple key"
1106 , i->Tok->Range.begin());
1107 i = SimpleKeys.
erase(i);
1114void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
1115 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
1119bool Scanner::unrollIndent(
int ToColumn) {
1125 while (Indent > ToColumn) {
1135bool Scanner::rollIndent(
int ToColumn
1140 if (Indent < ToColumn) {
1147 TokenQueue.
insert(InsertPoint,
T);
1152void Scanner::skipComment() {
1153 if (Current == End || *Current !=
'#')
1166void Scanner::scanToNextToken() {
1168 while (Current != End && (*Current ==
' ' || *Current ==
'\t')) {
1183 IsSimpleKeyAllowed =
true;
1187bool Scanner::scanStreamStart() {
1188 IsStartOfStream =
false;
1196 Current += EI.second;
1200bool Scanner::scanStreamEnd() {
1209 IsSimpleKeyAllowed =
false;
1210 IsAdjacentValueAllowedInFlow =
false;
1219bool Scanner::scanDirective() {
1223 IsSimpleKeyAllowed =
false;
1224 IsAdjacentValueAllowedInFlow =
false;
1229 Current = skip_while(&Scanner::skip_ns_char, Current);
1231 Current = skip_while(&Scanner::skip_s_white, Current);
1234 if (
Name ==
"YAML") {
1235 Current = skip_while(&Scanner::skip_ns_char, Current);
1240 }
else if(
Name ==
"TAG") {
1241 Current = skip_while(&Scanner::skip_ns_char, Current);
1242 Current = skip_while(&Scanner::skip_s_white, Current);
1243 Current = skip_while(&Scanner::skip_ns_char, Current);
1252bool Scanner::scanDocumentIndicator(
bool IsStart) {
1255 IsSimpleKeyAllowed =
false;
1256 IsAdjacentValueAllowedInFlow =
false;
1266bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1275 saveSimpleKeyCandidate(--TokenQueue.
end(), Column - 1,
false);
1278 IsSimpleKeyAllowed =
true;
1280 IsAdjacentValueAllowedInFlow =
false;
1285bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1286 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1287 IsSimpleKeyAllowed =
false;
1288 IsAdjacentValueAllowedInFlow =
true;
1300bool Scanner::scanFlowEntry() {
1301 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1302 IsSimpleKeyAllowed =
true;
1303 IsAdjacentValueAllowedInFlow =
false;
1312bool Scanner::scanBlockEntry() {
1314 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1315 IsSimpleKeyAllowed =
true;
1316 IsAdjacentValueAllowedInFlow =
false;
1325bool Scanner::scanKey() {
1329 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1330 IsSimpleKeyAllowed = !FlowLevel;
1331 IsAdjacentValueAllowedInFlow =
false;
1341bool Scanner::scanValue() {
1344 if (!SimpleKeys.
empty()) {
1348 T.Range = SK.Tok->Range;
1350 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++i) {
1363 IsSimpleKeyAllowed =
false;
1367 IsSimpleKeyAllowed = !FlowLevel;
1369 IsAdjacentValueAllowedInFlow =
false;
1392 while (
I >=
First && *
I ==
'\\') --
I;
1395 return (Position - 1 -
I) % 2 == 1;
1398bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1400 unsigned ColStart = Column;
1401 if (IsDoubleQuoted) {
1404 while (Current != End && *Current !=
'"')
1408 }
while ( Current != End
1409 && *(Current - 1) ==
'\\'
1413 while (Current != End) {
1415 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1418 }
else if (*Current ==
'\'')
1422 i = skip_b_break(Current);
1437 if (Current == End) {
1438 setError(
"Expected quote at end of scalar", Current);
1448 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1450 IsSimpleKeyAllowed =
false;
1451 IsAdjacentValueAllowedInFlow =
true;
1456bool Scanner::scanPlainScalar() {
1458 unsigned ColStart = Column;
1459 unsigned LeadingBlanks = 0;
1460 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1461 unsigned indent =
static_cast<unsigned>(Indent + 1);
1462 while (Current != End) {
1463 if (*Current ==
'#')
1466 while (Current != End &&
1467 ((*Current !=
':' && isPlainSafeNonBlank(Current)) ||
1468 (*Current ==
':' && isPlainSafeNonBlank(Current + 1)))) {
1477 if (!isBlankOrBreak(Current))
1482 while (isBlankOrBreak(Tmp)) {
1485 if (LeadingBlanks && (Column <
indent) && *Tmp ==
'\t') {
1486 setError(
"Found invalid tab character in indentation", Tmp);
1492 i = skip_b_break(Tmp);
1501 if (!FlowLevel && Column <
indent)
1506 if (Start == Current) {
1507 setError(
"Got empty plain scalar", Start);
1516 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1518 IsSimpleKeyAllowed =
false;
1519 IsAdjacentValueAllowedInFlow =
false;
1524bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1526 unsigned ColStart = Column;
1528 while (Current != End) {
1529 if ( *Current ==
'[' || *Current ==
']'
1530 || *Current ==
'{' || *Current ==
'}'
1541 if (Start + 1 == Current) {
1542 setError(
"Got empty alias or anchor", Start);
1552 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1554 IsSimpleKeyAllowed =
false;
1555 IsAdjacentValueAllowedInFlow =
false;
1560bool Scanner::scanBlockScalarIndicators(
char &StyleIndicator,
1561 char &ChompingIndicator,
1562 unsigned &IndentIndicator,
1564 StyleIndicator = scanBlockStyleIndicator();
1565 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1570char Scanner::scanBlockStyleIndicator() {
1571 char Indicator =
' ';
1572 if (Current != End && (*Current ==
'>' || *Current ==
'|')) {
1573 Indicator = *Current;
1579char Scanner::scanBlockChompingIndicator() {
1580 char Indicator =
' ';
1581 if (Current != End && (*Current ==
'+' || *Current ==
'-')) {
1582 Indicator = *Current;
1594 if (ChompingIndicator ==
'-')
1596 if (ChompingIndicator ==
'+')
1599 return Str.empty() ? 0 : 1;
1602unsigned Scanner::scanBlockIndentationIndicator() {
1603 unsigned Indent = 0;
1604 if (Current != End && (*Current >=
'1' && *Current <=
'9')) {
1611bool Scanner::scanBlockScalarHeader(
char &ChompingIndicator,
1612 unsigned &IndentIndicator,
bool &IsDone) {
1613 auto Start = Current;
1615 ChompingIndicator = scanBlockChompingIndicator();
1616 IndentIndicator = scanBlockIndentationIndicator();
1618 if (ChompingIndicator ==
' ')
1619 ChompingIndicator = scanBlockChompingIndicator();
1620 Current = skip_while(&Scanner::skip_s_white, Current);
1623 if (Current == End) {
1632 if (!consumeLineBreakIfPresent()) {
1633 setError(
"Expected a line break after block scalar header", Current);
1639bool Scanner::findBlockScalarIndent(
unsigned &BlockIndent,
1640 unsigned BlockExitIndent,
1641 unsigned &LineBreaks,
bool &IsDone) {
1642 unsigned MaxAllSpaceLineCharacters = 0;
1646 advanceWhile(&Scanner::skip_s_space);
1647 if (skip_nb_char(Current) != Current) {
1649 if (Column <= BlockExitIndent) {
1654 BlockIndent = Column;
1655 if (MaxAllSpaceLineCharacters > BlockIndent) {
1657 "Leading all-spaces line must be smaller than the block indent",
1658 LongestAllSpaceLine);
1663 if (skip_b_break(Current) != Current &&
1664 Column > MaxAllSpaceLineCharacters) {
1667 MaxAllSpaceLineCharacters = Column;
1668 LongestAllSpaceLine = Current;
1672 if (Current == End) {
1677 if (!consumeLineBreakIfPresent()) {
1686bool Scanner::scanBlockScalarIndent(
unsigned BlockIndent,
1687 unsigned BlockExitIndent,
bool &IsDone) {
1689 while (Column < BlockIndent) {
1690 auto I = skip_s_space(Current);
1697 if (skip_nb_char(Current) == Current)
1700 if (Column <= BlockExitIndent) {
1705 if (Column < BlockIndent) {
1706 if (Current != End && *Current ==
'#') {
1710 setError(
"A text line is less indented than the block scalar", Current);
1716bool Scanner::scanBlockScalar(
bool IsLiteral) {
1717 assert(*Current ==
'|' || *Current ==
'>');
1718 char StyleIndicator;
1719 char ChompingIndicator;
1720 unsigned BlockIndent;
1721 bool IsDone =
false;
1722 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1727 bool IsFolded = StyleIndicator ==
'>';
1729 const auto *Start = Current;
1730 unsigned BlockExitIndent = Indent < 0 ? 0 : (
unsigned)Indent;
1731 unsigned LineBreaks = 0;
1732 if (BlockIndent == 0) {
1733 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1741 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1747 auto LineStart = Current;
1748 advanceWhile(&Scanner::skip_nb_char);
1749 if (LineStart != Current) {
1750 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1754 if (LineBreaks == 1) {
1755 Str.append(LineBreaks,
1756 isLineEmpty(
StringRef(LineStart, Current - LineStart))
1766 Str.append(LineBreaks,
'\n');
1767 Str.append(
StringRef(LineStart, Current - LineStart));
1775 if (!consumeLineBreakIfPresent())
1780 if (Current == End && !LineBreaks)
1787 IsSimpleKeyAllowed =
true;
1788 IsAdjacentValueAllowedInFlow =
false;
1793 T.Value = std::string(Str);
1798bool Scanner::scanTag() {
1800 unsigned ColStart = Column;
1802 if (Current == End || isBlankOrBreak(Current));
1803 else if (*Current ==
'<') {
1810 Current = skip_while(&Scanner::skip_ns_char, Current);
1819 saveSimpleKeyCandidate(--TokenQueue.
end(), ColStart,
false);
1821 IsSimpleKeyAllowed =
false;
1822 IsAdjacentValueAllowedInFlow =
false;
1827bool Scanner::fetchMoreTokens() {
1828 if (IsStartOfStream)
1829 return scanStreamStart();
1834 return scanStreamEnd();
1836 removeStaleSimpleKeyCandidates();
1838 unrollIndent(Column);
1840 if (Column == 0 && *Current ==
'%')
1841 return scanDirective();
1843 if (Column == 0 && Current + 4 <= End
1845 && *(Current + 1) ==
'-'
1846 && *(Current + 2) ==
'-'
1847 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1848 return scanDocumentIndicator(
true);
1850 if (Column == 0 && Current + 4 <= End
1852 && *(Current + 1) ==
'.'
1853 && *(Current + 2) ==
'.'
1854 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1855 return scanDocumentIndicator(
false);
1857 if (*Current ==
'[')
1858 return scanFlowCollectionStart(
true);
1860 if (*Current ==
'{')
1861 return scanFlowCollectionStart(
false);
1863 if (*Current ==
']')
1864 return scanFlowCollectionEnd(
true);
1866 if (*Current ==
'}')
1867 return scanFlowCollectionEnd(
false);
1869 if (*Current ==
',')
1870 return scanFlowEntry();
1872 if (*Current ==
'-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
1873 return scanBlockEntry();
1875 if (*Current ==
'?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
1878 if (*Current ==
':' &&
1879 (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
1882 if (*Current ==
'*')
1883 return scanAliasOrAnchor(
true);
1885 if (*Current ==
'&')
1886 return scanAliasOrAnchor(
false);
1888 if (*Current ==
'!')
1891 if (*Current ==
'|' && !FlowLevel)
1892 return scanBlockScalar(
true);
1894 if (*Current ==
'>' && !FlowLevel)
1895 return scanBlockScalar(
false);
1897 if (*Current ==
'\'')
1898 return scanFlowScalar(
false);
1900 if (*Current ==
'"')
1901 return scanFlowScalar(
true);
1905 if ((!isBlankOrBreak(Current) &&
1906 FirstChar.find_first_of(
"-?:,[]{}#&*!|>'\"%@`") ==
StringRef::npos) ||
1908 isPlainSafeNonBlank(Current + 1)))
1909 return scanPlainScalar();
1911 setError(
"Unrecognized character while tokenizing.", Current);
1916 std::error_code *EC)
1917 : scanner(new
Scanner(Input, SM, ShowColors, EC)) {}
1920 std::error_code *EC)
1921 : scanner(new
Scanner(InputBuffer, SM, ShowColors, EC)) {}
1933 scanner->printError(
Range.Start, Kind, Msg,
Range);
1943 CurrentDoc.reset(
new Document(*
this));
1960 SourceRange =
SMRange(Start, Start);
1965 if (!Raw.
empty() && Raw !=
"!") {
1968 Ret = std::string(
Doc->getTagMap().find(
"!")->second);
1972 Ret = std::string(
Doc->getTagMap().find(
"!!")->second);
1977 std::map<StringRef, StringRef>::const_iterator It =
1978 Doc->getTagMap().find(TagHandle);
1979 if (It !=
Doc->getTagMap().end())
1980 Ret = std::string(It->second);
1984 T.Range = TagHandle;
1994 return "tag:yaml.org,2002:null";
1998 return "tag:yaml.org,2002:str";
2000 return "tag:yaml.org,2002:map";
2002 return "tag:yaml.org,2002:seq";
2009 return Doc->peekNext();
2013 return Doc->getNext();
2017 return Doc->parseBlockNode();
2021 return Doc->NodeAllocator;
2025 Doc->setError(Msg, Tok);
2029 return Doc->failed();
2033 if (
Value[0] ==
'"')
2034 return getDoubleQuotedValue(
Value, Storage);
2035 if (
Value[0] ==
'\'')
2036 return getSingleQuotedValue(
Value, Storage);
2037 return getPlainValue(
Value, Storage);
2061 return UnquotedValue;
2065 char LastNewLineAddedAs =
'\0';
2067 if (UnquotedValue[
I] !=
'\r' && UnquotedValue[
I] !=
'\n') {
2069 UnquotedValue = UnescapeCallback(UnquotedValue.
drop_front(
I), Storage);
2070 LastNewLineAddedAs =
'\0';
2077 LastNewLineAddedAs =
' ';
2083 switch (LastNewLineAddedAs) {
2086 Storage.
back() =
'\n';
2087 LastNewLineAddedAs =
'\n';
2095 LastNewLineAddedAs =
' ';
2100 if (UnquotedValue.
substr(
I, 2) ==
"\r\n")
2109ScalarNode::getDoubleQuotedValue(
StringRef RawValue,
2112 RawValue.
back() ==
'"');
2115 auto UnescapeFunc = [
this](
StringRef UnquotedValue,
2118 if (UnquotedValue.
size() == 1) {
2120 T.Range = UnquotedValue;
2121 setError(
"Unrecognized escape code",
T);
2126 switch (UnquotedValue[0]) {
2130 setError(
"Unrecognized escape code",
T);
2136 if (UnquotedValue.
size() >= 2 && UnquotedValue[1] ==
'\n')
2194 if (UnquotedValue.
size() < 3)
2197 unsigned int UnicodeScalarValue;
2200 UnicodeScalarValue = 0xFFFD;
2205 if (UnquotedValue.
size() < 5)
2208 unsigned int UnicodeScalarValue;
2211 UnicodeScalarValue = 0xFFFD;
2216 if (UnquotedValue.
size() < 9)
2219 unsigned int UnicodeScalarValue;
2222 UnicodeScalarValue = 0xFFFD;
2236 RawValue.
back() ==
'\'');
2239 auto UnescapeFunc = [](
StringRef UnquotedValue,
2254 RawValue = RawValue.
rtrim(
"\r\n \t");
2309 setError(
"Unexpected token in Key Value.", t);
2325void MappingNode::increment() {
2328 CurrentEntry =
nullptr;
2332 CurrentEntry->
skip();
2335 CurrentEntry =
nullptr;
2348 CurrentEntry =
nullptr;
2351 setError(
"Unexpected token. Expected Key or Block End",
T);
2355 CurrentEntry =
nullptr;
2369 CurrentEntry =
nullptr;
2372 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow "
2376 CurrentEntry =
nullptr;
2384 CurrentEntry =
nullptr;
2388 CurrentEntry->
skip();
2395 if (!CurrentEntry) {
2397 CurrentEntry =
nullptr;
2403 CurrentEntry =
nullptr;
2406 setError(
"Unexpected token. Expected Block Entry or Block End."
2411 CurrentEntry =
nullptr;
2418 if (!CurrentEntry) {
2420 CurrentEntry =
nullptr;
2426 CurrentEntry =
nullptr;
2428 }
else if (SeqType ==
ST_Flow) {
2433 WasPreviousTokenFlowEntry =
true;
2441 CurrentEntry =
nullptr;
2446 setError(
"Could not find closing ]!",
T);
2449 CurrentEntry =
nullptr;
2452 if (!WasPreviousTokenFlowEntry) {
2453 setError(
"Expected , between entries!",
T);
2455 CurrentEntry =
nullptr;
2460 if (!CurrentEntry) {
2463 WasPreviousTokenFlowEntry =
false;
2472 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2474 if (parseDirectives())
2482 if (stream.scanner->failed())
2497Token &Document::peekNext() {
2498 return stream.scanner->peekNext();
2501Token Document::getNext() {
2502 return stream.scanner->getNext();
2505void Document::setError(
const Twine &Message,
Token &Location)
const {
2506 stream.scanner->setError(Message, Location.Range.begin());
2509bool Document::failed()
const {
2510 return stream.scanner->failed();
2522 return new (NodeAllocator)
AliasNode(stream.CurrentDoc,
T.Range.substr(1));
2525 setError(
"Already encountered an anchor for this node!",
T);
2528 AnchorInfo = getNext();
2530 goto parse_property;
2533 setError(
"Already encountered a tag for this node!",
T);
2536 TagInfo = getNext();
2538 goto parse_property;
2548 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2554 return new (NodeAllocator)
2561 return new (NodeAllocator)
2568 return new (NodeAllocator)
2575 return new (NodeAllocator)
2582 return new (NodeAllocator)
2589 StringRef NullTerminatedStr(
T.Value.c_str(),
T.Value.length() + 1);
2591 return new (NodeAllocator)
2593 TagInfo.
Range, StrCopy,
T.Range);
2597 return new (NodeAllocator)
2608 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2612 if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2613 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2615 setError(
"Unexpected token",
T);
2625bool Document::parseDirectives() {
2626 bool isDirective =
false;
2630 parseTAGDirective();
2633 parseYAMLDirective();
2642void Document::parseYAMLDirective() {
2646void Document::parseTAGDirective() {
2650 T =
T.substr(
T.find_first_of(
" \t")).ltrim(
" \t");
2651 std::size_t HandleEnd =
T.find_first_of(
" \t");
2652 StringRef TagHandle =
T.substr(0, HandleEnd);
2653 StringRef TagPrefix =
T.substr(HandleEnd).ltrim(
" \t");
2654 TagMap[TagHandle] = TagPrefix;
2657bool Document::expectToken(
int TK) {
2660 setError(
"Unexpected token",
T);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
std::optional< std::vector< StOtherPiece > > Other
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file defines the SmallString class.
This file defines the SmallVector class.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
static bool is_ns_hex_digit(const char C)
static bool is_ns_word_char(const char C)
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
@ UEF_UTF32_LE
UTF-32 Little Endian.
@ UEF_UTF16_BE
UTF-16 Big Endian.
@ UEF_UTF16_LE
UTF-16 Little Endian.
@ UEF_UTF32_BE
UTF-32 Big Endian.
@ UEF_UTF8
UTF-8 or ascii.
@ UEF_Unknown
Not a valid Unicode encoding.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
static StringRef parseScalarValue(StringRef UnquotedValue, SmallVectorImpl< char > &Storage, StringRef LookupChars, std::function< StringRef(StringRef, SmallVectorImpl< char > &)> UnescapeCallback)
parseScalarValue - A common parsing routine for all flow scalar styles.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
static UTF8Decoded decodeUTF8(StringRef Range)
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
A linked-list with a custom, local allocator.
iterator insert(iterator I, T &&V)
void resetAlloc()
Reset the underlying allocator.
IteratorImpl< T, typename list_type::iterator > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Allocate memory in an ever growing pool, as if by bump-pointer.
Tagged union holding either a T or a Error.
const char * getBufferStart() const
const char * getBufferEnd() const
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
LLVM_ABI void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
StringRef - Represent a constant reference to a string, i.e.
LLVM_ABI size_t find_last_not_of(char C, size_t From=npos) const
Find the last character in the string that is not C, or npos if not found.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
char back() const
back - Get the last character in the string.
constexpr size_t size() const
size - Get the string size.
char front() const
front - Get the first character in the string.
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
StringRef copy(Allocator &A) const
static constexpr size_t npos
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
TypeID
Definitions of all of the base types for the Type system.
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
Represents an alias to a Node with an anchor.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
A YAML Stream is a sequence of Documents.
LLVM_ABI Node * parseBlockNode()
Root for parsing a node. Returns a single node.
LLVM_ABI bool skip()
Finish parsing the current document and return true if there are more.
Node * getRoot()
Parse and return the root level node.
LLVM_ABI Document(Stream &ParentStream)
Node * getValue()
Parse and return the value.
Node * getKey()
Parse and return the key.
Represents a YAML map created from either a block map for a flow map.
@ MT_Inline
An inline mapping node is used for "[key: value]".
Abstract base class for all Nodes.
StringRef getRawTag() const
Get the tag as it was written in the document.
unsigned int getType() const
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
std::unique_ptr< Document > & Doc
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
BumpPtrAllocator & getAllocator()
void setError(const Twine &Message, Token &Location) const
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Scans YAML tokens from a MemoryBuffer.
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
void setError(const Twine &Message, StringRef::iterator Position)
Token getNext()
Parse the next token and pop it from the queue.
bool failed()
Returns true if an error occurred while parsing.
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges={})
Token & peekNext()
Parse the next token and return it without popping it.
Represents a YAML sequence created from either a block sequence for a flow sequence.
This class represents a YAML stream potentially containing multiple documents.
LLVM_ABI document_iterator end()
LLVM_ABI document_iterator begin()
LLVM_ABI Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
LLVM_ABI void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Iterator abstraction for Documents over a Stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
LLVM_ABI bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
LLVM_ABI std::optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
LLVM_ABI bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
void skip(CollectionType &C)
LLVM_ABI std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
This is an optimization pass for GlobalISel generic memory operations.
std::error_code make_error_code(BitcodeError E)
testing::Matcher< const detail::ErrorHolder & > Failed()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Token - A single YAML token.
enum llvm::yaml::Token::TokenKind Kind
std::string Value
The value of a block scalar node.
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.