122 bool EndStatementAtEOF) {
128 CurPtr = CurBuf.
begin();
131 this->EndStatementAtEOF = EndStatementAtEOF;
136AsmToken AsmLexer::ReturnError(
const char *Loc,
const std::string &Msg) {
142int AsmLexer::getNextChar() {
143 if (CurPtr == CurBuf.
end())
145 return (
unsigned char)*CurPtr++;
148int AsmLexer::peekNextChar() {
149 if (CurPtr == CurBuf.
end())
151 return (
unsigned char)*CurPtr;
157AsmToken AsmLexer::LexFloatLiteral() {
162 if (*CurPtr ==
'-' || *CurPtr ==
'+')
163 return ReturnError(CurPtr,
"invalid sign in float literal");
166 if ((*CurPtr ==
'e' || *CurPtr ==
'E')) {
169 if (*CurPtr ==
'-' || *CurPtr ==
'+')
186AsmToken AsmLexer::LexHexFloatLiteral(
bool NoIntDigits) {
187 assert((*CurPtr ==
'p' || *CurPtr ==
'P' || *CurPtr ==
'.') &&
188 "unexpected parse state in floating hex");
189 bool NoFracDigits =
true;
192 if (*CurPtr ==
'.') {
195 const char *FracStart = CurPtr;
199 NoFracDigits = CurPtr == FracStart;
202 if (NoIntDigits && NoFracDigits)
203 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
204 "expected at least one significand digit");
207 if (*CurPtr !=
'p' && *CurPtr !=
'P')
208 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
209 "expected exponent part 'p'");
212 if (*CurPtr ==
'+' || *CurPtr ==
'-')
216 const char *ExpStart = CurPtr;
220 if (CurPtr == ExpStart)
221 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
222 "expected at least one exponent digit");
229 return isAlnum(
C) ||
C ==
'_' ||
C ==
'$' ||
C ==
'.' ||
C ==
'?' ||
230 (AllowAt &&
C ==
'@') || (AllowHash &&
C ==
'#');
235 if (CurPtr[-1] ==
'.' &&
isDigit(*CurPtr)) {
241 AllowHashInIdentifier) ||
242 *CurPtr ==
'e' || *CurPtr ==
'E')
243 return LexFloatLiteral();
246 while (
isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
250 if (CurPtr == TokStart+1 && TokStart[0] ==
'.')
261 IsAtStartOfStatement =
false;
267 IsAtStartOfStatement =
false;
271 return LexLineComment();
273 IsAtStartOfStatement =
false;
279 const char *CommentTextStart = CurPtr;
280 while (CurPtr != CurBuf.
end()) {
287 if (CommentConsumer) {
290 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
297 return ReturnError(TokStart,
"unterminated comment");
302AsmToken AsmLexer::LexLineComment() {
307 const char *CommentTextStart = CurPtr;
308 int CurChar = getNextChar();
309 while (CurChar !=
'\n' && CurChar !=
'\r' && CurChar != EOF)
310 CurChar = getNextChar();
311 const char *NewlinePtr = CurPtr;
312 if (CurChar ==
'\r' && CurPtr != CurBuf.
end() && *CurPtr ==
'\n')
316 if (CommentConsumer) {
319 StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
322 IsAtStartOfLine =
true;
324 if (IsAtStartOfStatement)
327 IsAtStartOfStatement =
true;
330 StringRef(TokStart, CurPtr - 1 - TokStart));
335 if (CurPtr[0] ==
'U' || CurPtr[0] ==
'u')
337 if (CurPtr[0] ==
'L' || CurPtr[0] ==
'l')
339 if (CurPtr[0] ==
'L' || CurPtr[0] ==
'l')
347 const char *FirstNonDec =
nullptr;
348 const char *LookAhead = CurPtr;
354 FirstNonDec = LookAhead;
363 bool isHex = LexHex && (*LookAhead ==
'h' || *LookAhead ==
'H');
364 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
370static const char *
findLastDigit(
const char *CurPtr,
unsigned DefaultRadix) {
371 while (hexDigitValue(*CurPtr) < DefaultRadix) {
378 if (
Value.isIntN(64))
392 return "hexadecimal";
394 return "base-" + std::to_string(Radix);
410 if (LexMasmIntegers && isdigit(CurPtr[-1])) {
411 const char *FirstNonBinary =
412 (CurPtr[-1] !=
'0' && CurPtr[-1] !=
'1') ? CurPtr - 1 :
nullptr;
413 const char *FirstNonDecimal =
414 (CurPtr[-1] <
'0' || CurPtr[-1] >
'9') ? CurPtr - 1 :
nullptr;
415 const char *OldCurPtr = CurPtr;
419 if (!FirstNonDecimal) {
420 FirstNonDecimal = CurPtr;
431 if (!FirstNonBinary) {
432 FirstNonBinary = CurPtr;
441 if (*CurPtr ==
'.') {
445 return LexFloatLiteral();
448 if (LexMasmHexFloats && (*CurPtr ==
'r' || *CurPtr ==
'R')) {
454 if (*CurPtr ==
'h' || *CurPtr ==
'H') {
458 }
else if (*CurPtr ==
't' || *CurPtr ==
'T') {
462 }
else if (*CurPtr ==
'o' || *CurPtr ==
'O' || *CurPtr ==
'q' ||
467 }
else if (*CurPtr ==
'y' || *CurPtr ==
'Y') {
471 }
else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
473 (*FirstNonDecimal ==
'd' || *FirstNonDecimal ==
'D')) {
475 }
else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
477 (*FirstNonBinary ==
'b' || *FirstNonBinary ==
'B')) {
486 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
500 if (LexMasmIntegers && UseMasmDefaultRadix) {
506 return ReturnError(TokStart,
507 "invalid " +
radixName(DefaultRadix) +
" number");
514 if (LexMotorolaIntegers && CurPtr[-1] ==
'$') {
515 const char *NumStart = CurPtr;
520 if (
StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
521 return ReturnError(TokStart,
"invalid hexadecimal number");
527 if (LexMotorolaIntegers && CurPtr[-1] ==
'%') {
528 const char *NumStart = CurPtr;
529 while (*CurPtr ==
'0' || *CurPtr ==
'1')
533 if (
StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
534 return ReturnError(TokStart,
"invalid binary number");
543 if (LexHLASMIntegers || CurPtr[-1] !=
'0' || CurPtr[0] ==
'.') {
546 if (!LexHLASMIntegers) {
547 bool IsHex = Radix == 16;
549 if (!IsHex && (*CurPtr ==
'.' || *CurPtr ==
'e' || *CurPtr ==
'E')) {
552 return LexFloatLiteral();
560 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
562 if (!LexHLASMIntegers)
570 if (!LexMasmIntegers && ((*CurPtr ==
'b') || (*CurPtr ==
'B'))) {
578 const char *NumStart = CurPtr;
579 while (CurPtr[0] ==
'0' || CurPtr[0] ==
'1')
583 if (CurPtr == NumStart)
584 return ReturnError(TokStart,
"invalid binary number");
590 return ReturnError(TokStart,
"invalid binary number");
599 if ((*CurPtr ==
'x') || (*CurPtr ==
'X')) {
601 const char *NumStart = CurPtr;
607 if (CurPtr[0] ==
'.' || CurPtr[0] ==
'p' || CurPtr[0] ==
'P')
608 return LexHexFloatLiteral(NumStart == CurPtr);
611 if (CurPtr == NumStart)
612 return ReturnError(CurPtr-2,
"invalid hexadecimal number");
615 if (
StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
616 return ReturnError(TokStart,
"invalid hexadecimal number");
619 if (LexMasmIntegers && (*CurPtr ==
'h' || *CurPtr ==
'H'))
634 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
648AsmToken AsmLexer::LexSingleQuote() {
649 int CurChar = getNextChar();
652 return ReturnError(TokStart,
"invalid usage of character literals");
654 if (LexMasmStrings) {
655 while (CurChar != EOF) {
656 if (CurChar !=
'\'') {
657 CurChar = getNextChar();
658 }
else if (peekNextChar() ==
'\'') {
662 CurChar = getNextChar();
668 return ReturnError(TokStart,
"unterminated string constant");
673 CurChar = getNextChar();
676 return ReturnError(TokStart,
"unterminated single quote");
678 CurChar = getNextChar();
681 return ReturnError(TokStart,
"single quote way too long");
689 char theChar = Res[2];
691 default:
Value = theChar;
break;
692 case '\'':
Value =
'\'';
break;
693 case 't':
Value =
'\t';
break;
694 case 'n':
Value =
'\n';
break;
695 case 'b':
Value =
'\b';
break;
696 case 'f':
Value =
'\f';
break;
697 case 'r':
Value =
'\r';
break;
707 int CurChar = getNextChar();
709 return ReturnError(TokStart,
"invalid usage of string literals");
711 if (LexMasmStrings) {
712 while (CurChar != EOF) {
713 if (CurChar !=
'"') {
714 CurChar = getNextChar();
715 }
else if (peekNextChar() ==
'"') {
719 CurChar = getNextChar();
725 return ReturnError(TokStart,
"unterminated string constant");
729 while (CurChar !=
'"') {
730 if (CurChar ==
'\\') {
732 CurChar = getNextChar();
736 return ReturnError(TokStart,
"unterminated string constant");
738 CurChar = getNextChar();
747 while (!isAtStartOfComment(CurPtr) &&
748 !isAtStatementSeparator(CurPtr) &&
749 *CurPtr !=
'\n' && *CurPtr !=
'\r' && CurPtr != CurBuf.
end()) {
752 return StringRef(TokStart, CurPtr-TokStart);
758 while (*CurPtr !=
'\n' && *CurPtr !=
'\r' && CurPtr != CurBuf.
end()) {
761 return StringRef(TokStart, CurPtr-TokStart);
765 bool ShouldSkipSpace) {
772 std::string SavedErr =
getErr();
776 for (ReadCount = 0; ReadCount < Buf.
size(); ++ReadCount) {
779 Buf[ReadCount] = Token;
787 SetError(SavedErrLoc, SavedErr);
791bool AsmLexer::isAtStartOfComment(
const char *
Ptr) {
792 if (MAI.
isHLASM() && !IsAtStartOfStatement)
797 if (CommentString.
size() == 1)
798 return CommentString[0] ==
Ptr[0];
801 if (CommentString[1] ==
'#')
802 return CommentString[0] ==
Ptr[0];
804 return strncmp(
Ptr, CommentString.
data(), CommentString.
size()) == 0;
807bool AsmLexer::isAtStatementSeparator(
const char *
Ptr) {
815 int CurChar = getNextChar();
817 if (!IsPeeking && CurChar ==
'#' && IsAtStartOfStatement) {
834 return LexLineComment();
837 if (isAtStartOfComment(TokStart)) {
839 return LexLineComment();
842 if (isAtStatementSeparator(TokStart)) {
844 IsAtStartOfLine =
true;
845 IsAtStartOfStatement =
true;
852 if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
853 IsAtStartOfLine =
true;
854 IsAtStartOfStatement =
true;
857 IsAtStartOfLine =
false;
858 bool OldIsAtStartOfStatement = IsAtStartOfStatement;
859 IsAtStartOfStatement =
false;
866 if (isalpha(CurChar) || CurChar ==
'_' || CurChar ==
'.')
867 return LexIdentifier();
870 return ReturnError(TokStart,
"invalid character in input");
872 if (EndStatementAtEOF) {
873 IsAtStartOfLine =
true;
874 IsAtStartOfStatement =
true;
880 IsAtStartOfStatement = OldIsAtStartOfStatement;
881 while (*CurPtr ==
' ' || *CurPtr ==
'\t')
888 IsAtStartOfLine =
true;
889 IsAtStartOfStatement =
true;
891 if (CurPtr != CurBuf.
end() && *CurPtr ==
'\n')
897 IsAtStartOfLine =
true;
898 IsAtStartOfStatement =
true;
912 if (LexMotorolaIntegers &&
isHexDigit(*CurPtr))
915 return LexIdentifier();
920 return LexIdentifier();
924 return LexIdentifier();
928 return LexIdentifier();
932 if (*CurPtr ==
'=') {
938 if (*CurPtr ==
'>') {
944 if (*CurPtr ==
'|') {
951 if (*CurPtr ==
'&') {
957 if (*CurPtr ==
'=') {
963 if (LexMotorolaIntegers && (*CurPtr ==
'0' || *CurPtr ==
'1')) {
968 IsAtStartOfStatement = OldIsAtStartOfStatement;
970 case '\'':
return LexSingleQuote();
971 case '"':
return LexQuote();
972 case '0':
case '1':
case '2':
case '3':
case '4':
973 case '5':
case '6':
case '7':
case '8':
case '9':
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static std::string radixName(unsigned Radix)
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
static AsmToken intToken(StringRef Ref, APInt &Value)
static const char * findLastDigit(const char *CurPtr, unsigned DefaultRadix)
static bool isIdentifierChar(char C)
Return true if the given character satisfies the following regular expression: [-a-zA-Z$....
static bool isDigit(const char C)
static bool isHexDigit(const char C)
This file provides utility classes that use RAII to save and restore values.
Class for arbitrary precision integers.
size_t size() const
size - Get the array size.
LLVM_ABI AsmLexer(const MCAsmInfo &MAI)
void UnLex(AsmToken const &Token)
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
SMLoc getErrLoc()
Get the current error location.
const std::string & getErr()
Get the current error string.
LLVM_ABI StringRef LexUntilEndOfStatement()
LLVM_ABI void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
LLVM_ABI size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true)
Look ahead an arbitrary number of tokens.
Target independent representation for an assembler token.
LLVM_ABI SMLoc getLoc() const
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
LLVM_ABI SMLoc getEndLoc() const
LLVM_ABI void dump(raw_ostream &OS) const
LLVM_ABI SMRange getLocRange() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
bool useAtForSpecifier() const
bool doesAllowDollarAtStartOfIdentifier() const
bool shouldUseMotorolaIntegers() const
StringRef getCommentString() const
const char * getSeparatorString() const
bool doesAllowAtAtStartOfIdentifier() const
bool shouldAllowAdditionalComments() const
bool doesAllowQuestionAtStartOfIdentifier() const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
@ Ref
The access may reference the value stored in memory.
A utility class that uses RAII to save and restore the value of a variable.