LLVM 22.0.0git
BPFAsmParser.cpp
Go to the documentation of this file.
1//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
13#include "llvm/MC/MCContext.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCInstrInfo.h"
20#include "llvm/MC/MCStreamer.h"
25
26using namespace llvm;
27
28namespace {
29struct BPFOperand;
30
31class BPFAsmParser : public MCTargetAsmParser {
32
33 SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34
35 bool PreMatchCheck(OperandVector &Operands);
36
37 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
40 bool MatchingInlineAsm) override;
41
42 bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
44 SMLoc &EndLoc) override;
45
47 SMLoc NameLoc, OperandVector &Operands) override;
48
49 // "=" is used as assignment operator for assembly statment, so can't be used
50 // for symbol assignment.
51 bool equalIsAsmAssignment() override { return false; }
52 // "*" is used for dereferencing memory that it will be the start of
53 // statement.
54 bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override {
55 return Token == AsmToken::Star;
56 }
57
58#define GET_ASSEMBLER_HEADER
59#include "BPFGenAsmMatcher.inc"
60
63 ParseStatus parseOperandAsOperator(OperandVector &Operands);
64
65public:
66 enum BPFMatchResultTy {
67 Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
68#define GET_OPERAND_DIAGNOSTIC_TYPES
69#include "BPFGenAsmMatcher.inc"
70#undef GET_OPERAND_DIAGNOSTIC_TYPES
71 };
72
73 BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
74 const MCInstrInfo &MII, const MCTargetOptions &Options)
75 : MCTargetAsmParser(Options, STI, MII) {
76 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
77 }
78};
79
80/// BPFOperand - Instances of this class represent a parsed machine
81/// instruction
82struct BPFOperand : public MCParsedAsmOperand {
83
84 enum KindTy {
85 Token,
87 Immediate,
88 } Kind;
89
90 struct RegOp {
91 MCRegister RegNum;
92 };
93
94 struct ImmOp {
95 const MCExpr *Val;
96 };
97
98 SMLoc StartLoc, EndLoc;
99 union {
100 StringRef Tok;
101 RegOp Reg;
102 ImmOp Imm;
103 };
104
105 BPFOperand(KindTy K) : Kind(K) {}
106
107public:
108 BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
109 Kind = o.Kind;
110 StartLoc = o.StartLoc;
111 EndLoc = o.EndLoc;
112
113 switch (Kind) {
114 case Register:
115 Reg = o.Reg;
116 break;
117 case Immediate:
118 Imm = o.Imm;
119 break;
120 case Token:
121 Tok = o.Tok;
122 break;
123 }
124 }
125
126 bool isToken() const override { return Kind == Token; }
127 bool isReg() const override { return Kind == Register; }
128 bool isImm() const override { return Kind == Immediate; }
129 bool isMem() const override { return false; }
130
131 bool isConstantImm() const {
132 return isImm() && isa<MCConstantExpr>(getImm());
133 }
134
135 int64_t getConstantImm() const {
136 const MCExpr *Val = getImm();
137 return static_cast<const MCConstantExpr *>(Val)->getValue();
138 }
139
140 bool isSImm16() const {
141 return (isConstantImm() && isInt<16>(getConstantImm()));
142 }
143
144 bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
145
146 bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
147
148 /// getStartLoc - Gets location of the first token of this operand
149 SMLoc getStartLoc() const override { return StartLoc; }
150 /// getEndLoc - Gets location of the last token of this operand
151 SMLoc getEndLoc() const override { return EndLoc; }
152
153 MCRegister getReg() const override {
154 assert(Kind == Register && "Invalid type access!");
155 return Reg.RegNum;
156 }
157
158 const MCExpr *getImm() const {
159 assert(Kind == Immediate && "Invalid type access!");
160 return Imm.Val;
161 }
162
163 StringRef getToken() const {
164 assert(Kind == Token && "Invalid type access!");
165 return Tok;
166 }
167
168 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
169 switch (Kind) {
170 case Immediate:
171 MAI.printExpr(OS, *getImm());
172 break;
173 case Register:
174 OS << "<register x";
175 OS << getReg() << ">";
176 break;
177 case Token:
178 OS << "'" << getToken() << "'";
179 break;
180 }
181 }
182
183 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
184 assert(Expr && "Expr shouldn't be null!");
185
186 if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
187 Inst.addOperand(MCOperand::createImm(CE->getValue()));
188 else
190 }
191
192 // Used by the TableGen Code
193 void addRegOperands(MCInst &Inst, unsigned N) const {
194 assert(N == 1 && "Invalid number of operands!");
196 }
197
198 void addImmOperands(MCInst &Inst, unsigned N) const {
199 assert(N == 1 && "Invalid number of operands!");
200 addExpr(Inst, getImm());
201 }
202
203 static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
204 auto Op = std::make_unique<BPFOperand>(Token);
205 Op->Tok = Str;
206 Op->StartLoc = S;
207 Op->EndLoc = S;
208 return Op;
209 }
210
211 static std::unique_ptr<BPFOperand> createReg(MCRegister Reg, SMLoc S,
212 SMLoc E) {
213 auto Op = std::make_unique<BPFOperand>(Register);
214 Op->Reg.RegNum = Reg;
215 Op->StartLoc = S;
216 Op->EndLoc = E;
217 return Op;
218 }
219
220 static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
221 SMLoc E) {
222 auto Op = std::make_unique<BPFOperand>(Immediate);
223 Op->Imm.Val = Val;
224 Op->StartLoc = S;
225 Op->EndLoc = E;
226 return Op;
227 }
228
229 // Identifiers that can be used at the start of a statment.
230 static bool isValidIdAtStart(StringRef Name) {
231 return StringSwitch<bool>(Name.lower())
232 .Case("if", true)
233 .Case("call", true)
234 .Case("callx", true)
235 .Case("goto", true)
236 .Case("gotol", true)
237 .Case("may_goto", true)
238 .Case("*", true)
239 .Case("exit", true)
240 .Case("lock", true)
241 .Case("ld_pseudo", true)
242 .Case("store_release", true)
243 .Default(false);
244 }
245
246 // Identifiers that can be used in the middle of a statment.
247 static bool isValidIdInMiddle(StringRef Name) {
248 return StringSwitch<bool>(Name.lower())
249 .Case("u64", true)
250 .Case("u32", true)
251 .Case("u16", true)
252 .Case("u8", true)
253 .Case("s32", true)
254 .Case("s16", true)
255 .Case("s8", true)
256 .Case("be64", true)
257 .Case("be32", true)
258 .Case("be16", true)
259 .Case("le64", true)
260 .Case("le32", true)
261 .Case("le16", true)
262 .Case("bswap16", true)
263 .Case("bswap32", true)
264 .Case("bswap64", true)
265 .Case("goto", true)
266 .Case("ll", true)
267 .Case("skb", true)
268 .Case("s", true)
269 .Case("atomic_fetch_add", true)
270 .Case("atomic_fetch_and", true)
271 .Case("atomic_fetch_or", true)
272 .Case("atomic_fetch_xor", true)
273 .Case("xchg_64", true)
274 .Case("xchg32_32", true)
275 .Case("cmpxchg_64", true)
276 .Case("cmpxchg32_32", true)
277 .Case("addr_space_cast", true)
278 .Case("load_acquire", true)
279 .Default(false);
280 }
281};
282} // end anonymous namespace.
283
284#define GET_REGISTER_MATCHER
285#define GET_MATCHER_IMPLEMENTATION
286#include "BPFGenAsmMatcher.inc"
287
288bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
289
290 if (Operands.size() == 4) {
291 // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
292 // reg1 must be the same as reg2
293 BPFOperand &Op0 = (BPFOperand &)*Operands[0];
294 BPFOperand &Op1 = (BPFOperand &)*Operands[1];
295 BPFOperand &Op2 = (BPFOperand &)*Operands[2];
296 BPFOperand &Op3 = (BPFOperand &)*Operands[3];
297 if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
298 && Op1.getToken() == "="
299 && (Op2.getToken() == "-" || Op2.getToken() == "be16"
300 || Op2.getToken() == "be32" || Op2.getToken() == "be64"
301 || Op2.getToken() == "le16" || Op2.getToken() == "le32"
302 || Op2.getToken() == "le64")
303 && Op0.getReg() != Op3.getReg())
304 return true;
305 }
306
307 return false;
308}
309
310bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
313 bool MatchingInlineAsm) {
314 MCInst Inst;
315 SMLoc ErrorLoc;
316
317 if (PreMatchCheck(Operands))
318 return Error(IDLoc, "additional inst constraint not met");
319
320 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
321 default:
322 break;
323 case Match_Success:
324 Inst.setLoc(IDLoc);
325 Out.emitInstruction(Inst, getSTI());
326 return false;
327 case Match_MissingFeature:
328 return Error(IDLoc, "instruction use requires an option to be enabled");
329 case Match_MnemonicFail:
330 return Error(IDLoc, "unrecognized instruction mnemonic");
331 case Match_InvalidOperand:
332 ErrorLoc = IDLoc;
333
334 if (ErrorInfo != ~0U) {
335 if (ErrorInfo >= Operands.size())
336 return Error(ErrorLoc, "too few operands for instruction");
337
338 ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
339
340 if (ErrorLoc == SMLoc())
341 ErrorLoc = IDLoc;
342 }
343
344 return Error(ErrorLoc, "invalid operand for instruction");
345 case Match_InvalidBrTarget:
346 return Error(Operands[ErrorInfo]->getStartLoc(),
347 "operand is not an identifier or 16-bit signed integer");
348 case Match_InvalidSImm16:
349 return Error(Operands[ErrorInfo]->getStartLoc(),
350 "operand is not a 16-bit signed integer");
351 case Match_InvalidTiedOperand:
352 return Error(Operands[ErrorInfo]->getStartLoc(),
353 "operand is not the same as the dst register");
354 }
355
356 llvm_unreachable("Unknown match type detected!");
357}
358
359bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
360 SMLoc &EndLoc) {
361 if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
362 return Error(StartLoc, "invalid register name");
363 return false;
364}
365
366ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
367 SMLoc &EndLoc) {
368 const AsmToken &Tok = getParser().getTok();
369 StartLoc = Tok.getLoc();
370 EndLoc = Tok.getEndLoc();
371 Reg = BPF::NoRegister;
372 StringRef Name = getLexer().getTok().getIdentifier();
373
374 if (!MatchRegisterName(Name)) {
375 getParser().Lex(); // Eat identifier token.
377 }
378
380}
381
382ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
383 SMLoc S = getLoc();
384
385 if (getLexer().getKind() == AsmToken::Identifier) {
386 StringRef Name = getLexer().getTok().getIdentifier();
387
388 if (BPFOperand::isValidIdInMiddle(Name)) {
389 getLexer().Lex();
390 Operands.push_back(BPFOperand::createToken(Name, S));
392 }
393
395 }
396
397 switch (getLexer().getKind()) {
398 case AsmToken::Minus:
399 case AsmToken::Plus: {
400 if (getLexer().peekTok().is(AsmToken::Integer))
402 [[fallthrough]];
403 }
404
405 case AsmToken::Equal:
407 case AsmToken::Less:
408 case AsmToken::Pipe:
409 case AsmToken::Star:
410 case AsmToken::LParen:
411 case AsmToken::RParen:
412 case AsmToken::LBrac:
413 case AsmToken::RBrac:
414 case AsmToken::Slash:
415 case AsmToken::Amp:
417 case AsmToken::Caret: {
418 StringRef Name = getLexer().getTok().getString();
419 getLexer().Lex();
420 Operands.push_back(BPFOperand::createToken(Name, S));
421
423 }
424
430 case AsmToken::LessLess: {
431 Operands.push_back(BPFOperand::createToken(
432 getLexer().getTok().getString().substr(0, 1), S));
433 Operands.push_back(BPFOperand::createToken(
434 getLexer().getTok().getString().substr(1, 1), S));
435 getLexer().Lex();
436
438 }
439
440 default:
441 break;
442 }
443
445}
446
447ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
448 SMLoc S = getLoc();
450
451 switch (getLexer().getKind()) {
452 default:
455 StringRef Name = getLexer().getTok().getIdentifier();
457
458 if (!Reg)
460
461 getLexer().Lex();
462 Operands.push_back(BPFOperand::createReg(Reg, S, E));
463 }
465}
466
467ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
468 switch (getLexer().getKind()) {
469 default:
471 case AsmToken::LParen:
472 case AsmToken::Minus:
473 case AsmToken::Plus:
475 case AsmToken::String:
477 break;
478 }
479
480 const MCExpr *IdVal;
481 SMLoc S = getLoc();
482
483 if (getParser().parseExpression(IdVal))
485
487 Operands.push_back(BPFOperand::createImm(IdVal, S, E));
488
490}
491
492/// Parse an BPF instruction which is in BPF verifier format.
493bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
494 SMLoc NameLoc, OperandVector &Operands) {
495 // The first operand could be either register or actually an operator.
497
498 if (Reg) {
499 SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
500 Operands.push_back(BPFOperand::createReg(Reg, NameLoc, E));
501 } else if (BPFOperand::isValidIdAtStart(Name))
502 Operands.push_back(BPFOperand::createToken(Name, NameLoc));
503 else
504 return Error(NameLoc, "invalid register/token name");
505
506 while (!getLexer().is(AsmToken::EndOfStatement)) {
507 // Attempt to parse token as operator
508 if (parseOperandAsOperator(Operands).isSuccess())
509 continue;
510
511 // Attempt to parse token as register
512 if (parseRegister(Operands).isSuccess())
513 continue;
514
515 if (getLexer().is(AsmToken::Comma)) {
516 getLexer().Lex();
517 continue;
518 }
519
520 // Attempt to parse token as an immediate
521 if (!parseImmediate(Operands).isSuccess()) {
522 SMLoc Loc = getLexer().getLoc();
523 return Error(Loc, "unexpected token");
524 }
525 }
526
527 if (getLexer().isNot(AsmToken::EndOfStatement)) {
528 SMLoc Loc = getLexer().getLoc();
529
530 getParser().eatToEndOfStatement();
531
532 return Error(Loc, "unexpected token");
533 }
534
535 // Consume the EndOfStatement.
536 getParser().Lex();
537 return false;
538}
539
544}
static MCRegister MatchRegisterName(StringRef Name)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser()
#define LLVM_ABI
Definition: Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static LVOptions Options
Definition: LVOptions.cpp:25
mir Rename Register Operands
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
raw_pwrite_stream & OS
static StringRef substr(StringRef Str, uint64_t Len)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes)
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:22
LLVM_ABI SMLoc getLoc() const
Definition: AsmLexer.cpp:32
LLVM_ABI SMLoc getEndLoc() const
Definition: AsmLexer.cpp:34
This class represents an Operation in the Expression.
Base class for user error types.
Definition: Error.h:354
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:64
void printExpr(raw_ostream &, const MCExpr &) const
Definition: MCAsmInfo.cpp:153
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:124
const AsmToken & getTok() const
Get the current AsmToken from the stream.
Definition: MCAsmParser.cpp:43
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:188
void setLoc(SMLoc loc)
Definition: MCInst.h:207
void addOperand(const MCOperand Op)
Definition: MCInst.h:215
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:27
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:145
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Streaming machine code generation interface.
Definition: MCStreamer.h:220
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool tokenIsStartOfStatement(AsmToken::TokenKind Token)
virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
Parse one assembly instruction.
virtual bool equalIsAsmAssignment()
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
Recognize a series of operands of a parsed instruction as an actual MCInst and emit it to the specifi...
void setAvailableFeatures(const FeatureBitset &Value)
Ternary parse status returned by various parse* methods.
static constexpr StatusTy Failure
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:68
R Default(T Value)
Definition: StringSwitch.h:177
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:170
Target & getTheBPFleTarget()
Target & getTheBPFbeTarget()
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition: SPIRVUtils.cpp:976
Target & getTheBPFTarget()
DWARFExpression::Operation Op
#define N
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...