LLVM 22.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
140 for (auto Op :
143 // Note supported floating-point library function operators that otherwise
144 // default to expand.
148 // Support minimum and maximum, which otherwise default to expand.
151 // When experimental v8f16 support is enabled these instructions don't need
152 // to be expanded.
153 if (T != MVT::v8f16) {
156 }
158 setTruncStoreAction(T, MVT::f16, Expand);
159 }
160
161 // Expand unavailable integer operations.
162 for (auto Op :
166 for (auto T : {MVT::i32, MVT::i64})
168 if (Subtarget->hasSIMD128())
169 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
171 }
172
173 if (Subtarget->hasWideArithmetic()) {
179 }
180
181 if (Subtarget->hasNontrappingFPToInt())
183 for (auto T : {MVT::i32, MVT::i64})
185
186 // SIMD-specific configuration
187 if (Subtarget->hasSIMD128()) {
188
189 // Combine partial.reduce.add before legalization gets confused.
191
192 // Combine wide-vector muls, with extend inputs, to extmul_half.
194
195 // Combine vector mask reductions into alltrue/anytrue
197
198 // Convert vector to integer bitcasts to bitmask
200
201 // Hoist bitcasts out of shuffles
203
204 // Combine extends of extract_subvectors into widening ops
206
207 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
208 // conversions ops
211
212 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
213 // into conversion ops
216
218
219 // Support saturating add/sub for i8x16 and i16x8
221 for (auto T : {MVT::v16i8, MVT::v8i16})
223
224 // Support integer abs
225 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
227
228 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
229 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
230 MVT::v2f64})
232
233 if (Subtarget->hasFP16())
235
236 // We have custom shuffle lowering to expose the shuffle mask
237 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
238 MVT::v2f64})
240
241 if (Subtarget->hasFP16())
243
244 // Support splatting
245 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
246 MVT::v2f64})
248
249 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
250
251 // Custom lowering since wasm shifts must have a scalar shift amount
252 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
253 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
255
256 // Custom lower lane accesses to expand out variable indices
258 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
259 MVT::v2f64})
261
262 // There is no i8x16.mul instruction
263 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
264
265 // There is no vector conditional select instruction
266 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
267 MVT::v2f64})
269
270 // Expand integer operations supported for scalars but not SIMD
271 for (auto Op :
273 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
275
276 // But we do have integer min and max operations
277 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
278 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
280
281 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
282 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
283 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
284 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
285
286 // Custom lower bit counting operations for other types to scalarize them.
287 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
288 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
290
291 // Expand float operations supported for scalars but not SIMD
294 for (auto T : {MVT::v4f32, MVT::v2f64})
296
297 // Unsigned comparison operations are unavailable for i64x2 vectors.
299 setCondCodeAction(CC, MVT::v2i64, Custom);
300
301 // 64x2 conversions are not in the spec
302 for (auto Op :
304 for (auto T : {MVT::v2i64, MVT::v2f64})
306
307 // But saturating fp_to_int converstions are
309 setOperationAction(Op, MVT::v4i32, Custom);
310 if (Subtarget->hasFP16()) {
311 setOperationAction(Op, MVT::v8i16, Custom);
312 }
313 }
314
315 // Support vector extending
319 }
320 }
321
322 // As a special case, these operators use the type to mean the type to
323 // sign-extend from.
325 if (!Subtarget->hasSignExt()) {
326 // Sign extends are legal only when extending a vector extract
327 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
328 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
330 }
333
334 // Dynamic stack allocation: use the default expansion.
338
342
343 // Expand these forms; we pattern-match the forms that we can handle in isel.
344 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
345 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
347
348 // We have custom switch handling.
350
351 // WebAssembly doesn't have:
352 // - Floating-point extending loads.
353 // - Floating-point truncating stores.
354 // - i1 extending loads.
355 // - truncating SIMD stores and most extending loads
356 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
357 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
358 for (auto T : MVT::integer_valuetypes())
359 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
360 setLoadExtAction(Ext, T, MVT::i1, Promote);
361 if (Subtarget->hasSIMD128()) {
362 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
363 MVT::v2f64}) {
364 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
365 if (MVT(T) != MemT) {
367 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
368 setLoadExtAction(Ext, T, MemT, Expand);
369 }
370 }
371 }
372 // But some vector extending loads are legal
373 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
374 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
375 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
376 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
377 }
378 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
379 }
380
381 // Don't do anything clever with build_pairs
383
384 // Trap lowers to wasm unreachable
385 setOperationAction(ISD::TRAP, MVT::Other, Legal);
387
388 // Exception handling intrinsics
392
394
395 // Always convert switches to br_tables unless there is only one case, which
396 // is equivalent to a simple branch. This reduces code size for wasm, and we
397 // defer possible jump table optimizations to the VM.
399}
400
402 uint32_t AS) const {
404 return MVT::externref;
406 return MVT::funcref;
408}
409
411 uint32_t AS) const {
413 return MVT::externref;
415 return MVT::funcref;
417}
418
419bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic(
420 const IntrinsicInst *I) const {
421 if (I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add)
422 return true;
423
424 EVT VT = EVT::getEVT(I->getType());
425 auto Op1 = I->getOperand(1);
426
427 if (auto *InputInst = dyn_cast<Instruction>(Op1)) {
428 if (InstructionOpcodeToISD(InputInst->getOpcode()) != ISD::MUL)
429 return true;
430
431 if (isa<Instruction>(InputInst->getOperand(0)) &&
432 isa<Instruction>(InputInst->getOperand(1))) {
433 // dot only supports signed inputs but also support lowering unsigned.
434 if (cast<Instruction>(InputInst->getOperand(0))->getOpcode() !=
435 cast<Instruction>(InputInst->getOperand(1))->getOpcode())
436 return true;
437
438 EVT Op1VT = EVT::getEVT(Op1->getType());
439 if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
440 ((VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()) ||
441 (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount())))
442 return false;
443 }
444 }
445 return true;
446}
447
449WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
450 // We have wasm instructions for these
451 switch (AI->getOperation()) {
459 default:
460 break;
461 }
463}
464
465bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
466 // Implementation copied from X86TargetLowering.
467 unsigned Opc = VecOp.getOpcode();
468
469 // Assume target opcodes can't be scalarized.
470 // TODO - do we have any exceptions?
472 return false;
473
474 // If the vector op is not supported, try to convert to scalar.
475 EVT VecVT = VecOp.getValueType();
477 return true;
478
479 // If the vector op is supported, but the scalar op is not, the transform may
480 // not be worthwhile.
481 EVT ScalarVT = VecVT.getScalarType();
482 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
483}
484
485FastISel *WebAssemblyTargetLowering::createFastISel(
486 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
487 return WebAssembly::createFastISel(FuncInfo, LibInfo);
488}
489
490MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
491 EVT VT) const {
492 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
493 if (BitWidth > 1 && BitWidth < 8)
494 BitWidth = 8;
495
496 if (BitWidth > 64) {
497 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
498 // the count to be an i32.
499 BitWidth = 32;
501 "32-bit shift counts ought to be enough for anyone");
502 }
503
506 "Unable to represent scalar shift amount type");
507 return Result;
508}
509
510// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
511// undefined result on invalid/overflow, to the WebAssembly opcode, which
512// traps on invalid/overflow.
515 const TargetInstrInfo &TII,
516 bool IsUnsigned, bool Int64,
517 bool Float64, unsigned LoweredOpcode) {
519
520 Register OutReg = MI.getOperand(0).getReg();
521 Register InReg = MI.getOperand(1).getReg();
522
523 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
524 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
525 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
526 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
527 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
528 unsigned Eqz = WebAssembly::EQZ_I32;
529 unsigned And = WebAssembly::AND_I32;
530 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
531 int64_t Substitute = IsUnsigned ? 0 : Limit;
532 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
533 auto &Context = BB->getParent()->getFunction().getContext();
535
536 const BasicBlock *LLVMBB = BB->getBasicBlock();
537 MachineFunction *F = BB->getParent();
538 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
539 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
540 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
541
543 F->insert(It, FalseMBB);
544 F->insert(It, TrueMBB);
545 F->insert(It, DoneMBB);
546
547 // Transfer the remainder of BB and its successor edges to DoneMBB.
548 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
550
551 BB->addSuccessor(TrueMBB);
552 BB->addSuccessor(FalseMBB);
553 TrueMBB->addSuccessor(DoneMBB);
554 FalseMBB->addSuccessor(DoneMBB);
555
556 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
557 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
558 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
559 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
560 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
561 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
562 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
563
564 MI.eraseFromParent();
565 // For signed numbers, we can do a single comparison to determine whether
566 // fabs(x) is within range.
567 if (IsUnsigned) {
568 Tmp0 = InReg;
569 } else {
570 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
571 }
572 BuildMI(BB, DL, TII.get(FConst), Tmp1)
573 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
574 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
575
576 // For unsigned numbers, we have to do a separate comparison with zero.
577 if (IsUnsigned) {
578 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
579 Register SecondCmpReg =
580 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
581 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
582 BuildMI(BB, DL, TII.get(FConst), Tmp1)
583 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
584 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
585 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
586 CmpReg = AndReg;
587 }
588
589 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
590
591 // Create the CFG diamond to select between doing the conversion or using
592 // the substitute value.
593 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
594 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
595 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
596 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
597 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
598 .addReg(FalseReg)
599 .addMBB(FalseMBB)
600 .addReg(TrueReg)
601 .addMBB(TrueMBB);
602
603 return DoneMBB;
604}
605
606// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
607// instuction to handle the zero-length case.
610 const TargetInstrInfo &TII, bool Int64) {
612
613 MachineOperand DstMem = MI.getOperand(0);
614 MachineOperand SrcMem = MI.getOperand(1);
615 MachineOperand Dst = MI.getOperand(2);
616 MachineOperand Src = MI.getOperand(3);
617 MachineOperand Len = MI.getOperand(4);
618
619 // We're going to add an extra use to `Len` to test if it's zero; that
620 // use shouldn't be a kill, even if the original use is.
621 MachineOperand NoKillLen = Len;
622 NoKillLen.setIsKill(false);
623
624 // Decide on which `MachineInstr` opcode we're going to use.
625 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
626 unsigned MemoryCopy =
627 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
628
629 // Create two new basic blocks; one for the new `memory.fill` that we can
630 // branch over, and one for the rest of the instructions after the original
631 // `memory.fill`.
632 const BasicBlock *LLVMBB = BB->getBasicBlock();
633 MachineFunction *F = BB->getParent();
634 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
635 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
636
638 F->insert(It, TrueMBB);
639 F->insert(It, DoneMBB);
640
641 // Transfer the remainder of BB and its successor edges to DoneMBB.
642 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
644
645 // Connect the CFG edges.
646 BB->addSuccessor(TrueMBB);
647 BB->addSuccessor(DoneMBB);
648 TrueMBB->addSuccessor(DoneMBB);
649
650 // Create a virtual register for the `Eqz` result.
651 unsigned EqzReg;
652 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
653
654 // Erase the original `memory.copy`.
655 MI.eraseFromParent();
656
657 // Test if `Len` is zero.
658 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
659
660 // Insert a new `memory.copy`.
661 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
662 .add(DstMem)
663 .add(SrcMem)
664 .add(Dst)
665 .add(Src)
666 .add(Len);
667
668 // Create the CFG triangle.
669 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
670 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
671
672 return DoneMBB;
673}
674
675// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
676// instuction to handle the zero-length case.
679 const TargetInstrInfo &TII, bool Int64) {
681
682 MachineOperand Mem = MI.getOperand(0);
683 MachineOperand Dst = MI.getOperand(1);
684 MachineOperand Val = MI.getOperand(2);
685 MachineOperand Len = MI.getOperand(3);
686
687 // We're going to add an extra use to `Len` to test if it's zero; that
688 // use shouldn't be a kill, even if the original use is.
689 MachineOperand NoKillLen = Len;
690 NoKillLen.setIsKill(false);
691
692 // Decide on which `MachineInstr` opcode we're going to use.
693 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
694 unsigned MemoryFill =
695 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
696
697 // Create two new basic blocks; one for the new `memory.fill` that we can
698 // branch over, and one for the rest of the instructions after the original
699 // `memory.fill`.
700 const BasicBlock *LLVMBB = BB->getBasicBlock();
701 MachineFunction *F = BB->getParent();
702 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
703 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
704
706 F->insert(It, TrueMBB);
707 F->insert(It, DoneMBB);
708
709 // Transfer the remainder of BB and its successor edges to DoneMBB.
710 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
712
713 // Connect the CFG edges.
714 BB->addSuccessor(TrueMBB);
715 BB->addSuccessor(DoneMBB);
716 TrueMBB->addSuccessor(DoneMBB);
717
718 // Create a virtual register for the `Eqz` result.
719 unsigned EqzReg;
720 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
721
722 // Erase the original `memory.fill`.
723 MI.eraseFromParent();
724
725 // Test if `Len` is zero.
726 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
727
728 // Insert a new `memory.copy`.
729 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
730
731 // Create the CFG triangle.
732 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
733 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
734
735 return DoneMBB;
736}
737
738static MachineBasicBlock *
740 const WebAssemblySubtarget *Subtarget,
741 const TargetInstrInfo &TII) {
742 MachineInstr &CallParams = *CallResults.getPrevNode();
743 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
744 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
745 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
746
747 bool IsIndirect =
748 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
749 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
750
751 bool IsFuncrefCall = false;
752 if (IsIndirect && CallParams.getOperand(0).isReg()) {
753 Register Reg = CallParams.getOperand(0).getReg();
754 const MachineFunction *MF = BB->getParent();
755 const MachineRegisterInfo &MRI = MF->getRegInfo();
756 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
757 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
758 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
759 }
760
761 unsigned CallOp;
762 if (IsIndirect && IsRetCall) {
763 CallOp = WebAssembly::RET_CALL_INDIRECT;
764 } else if (IsIndirect) {
765 CallOp = WebAssembly::CALL_INDIRECT;
766 } else if (IsRetCall) {
767 CallOp = WebAssembly::RET_CALL;
768 } else {
769 CallOp = WebAssembly::CALL;
770 }
771
772 MachineFunction &MF = *BB->getParent();
773 const MCInstrDesc &MCID = TII.get(CallOp);
774 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
775
776 // Move the function pointer to the end of the arguments for indirect calls
777 if (IsIndirect) {
778 auto FnPtr = CallParams.getOperand(0);
779 CallParams.removeOperand(0);
780
781 // For funcrefs, call_indirect is done through __funcref_call_table and the
782 // funcref is always installed in slot 0 of the table, therefore instead of
783 // having the function pointer added at the end of the params list, a zero
784 // (the index in
785 // __funcref_call_table is added).
786 if (IsFuncrefCall) {
787 Register RegZero =
788 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
789 MachineInstrBuilder MIBC0 =
790 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
791
792 BB->insert(CallResults.getIterator(), MIBC0);
793 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
794 } else
795 CallParams.addOperand(FnPtr);
796 }
797
798 for (auto Def : CallResults.defs())
799 MIB.add(Def);
800
801 if (IsIndirect) {
802 // Placeholder for the type index.
803 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
804 MIB.addImm(0);
805 // The table into which this call_indirect indexes.
806 MCSymbolWasm *Table = IsFuncrefCall
808 MF.getContext(), Subtarget)
810 MF.getContext(), Subtarget);
811 if (Subtarget->hasCallIndirectOverlong()) {
812 MIB.addSym(Table);
813 } else {
814 // For the MVP there is at most one table whose number is 0, but we can't
815 // write a table symbol or issue relocations. Instead we just ensure the
816 // table is live and write a zero.
817 Table->setNoStrip();
818 MIB.addImm(0);
819 }
820 }
821
822 for (auto Use : CallParams.uses())
823 MIB.add(Use);
824
825 BB->insert(CallResults.getIterator(), MIB);
826 CallParams.eraseFromParent();
827 CallResults.eraseFromParent();
828
829 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
830 // table slot with ref.null upon call_indirect return.
831 //
832 // This generates the following code, which comes right after a call_indirect
833 // of a funcref:
834 //
835 // i32.const 0
836 // ref.null func
837 // table.set __funcref_call_table
838 if (IsIndirect && IsFuncrefCall) {
840 MF.getContext(), Subtarget);
841 Register RegZero =
842 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
843 MachineInstr *Const0 =
844 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
845 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
846
847 Register RegFuncref =
848 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
849 MachineInstr *RefNull =
850 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
851 BB->insertAfter(Const0->getIterator(), RefNull);
852
853 MachineInstr *TableSet =
854 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
855 .addSym(Table)
856 .addReg(RegZero)
857 .addReg(RegFuncref);
858 BB->insertAfter(RefNull->getIterator(), TableSet);
859 }
860
861 return BB;
862}
863
864MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
865 MachineInstr &MI, MachineBasicBlock *BB) const {
866 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
867 DebugLoc DL = MI.getDebugLoc();
868
869 switch (MI.getOpcode()) {
870 default:
871 llvm_unreachable("Unexpected instr type to insert");
872 case WebAssembly::FP_TO_SINT_I32_F32:
873 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
874 WebAssembly::I32_TRUNC_S_F32);
875 case WebAssembly::FP_TO_UINT_I32_F32:
876 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
877 WebAssembly::I32_TRUNC_U_F32);
878 case WebAssembly::FP_TO_SINT_I64_F32:
879 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
880 WebAssembly::I64_TRUNC_S_F32);
881 case WebAssembly::FP_TO_UINT_I64_F32:
882 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
883 WebAssembly::I64_TRUNC_U_F32);
884 case WebAssembly::FP_TO_SINT_I32_F64:
885 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
886 WebAssembly::I32_TRUNC_S_F64);
887 case WebAssembly::FP_TO_UINT_I32_F64:
888 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
889 WebAssembly::I32_TRUNC_U_F64);
890 case WebAssembly::FP_TO_SINT_I64_F64:
891 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
892 WebAssembly::I64_TRUNC_S_F64);
893 case WebAssembly::FP_TO_UINT_I64_F64:
894 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
895 WebAssembly::I64_TRUNC_U_F64);
896 case WebAssembly::MEMCPY_A32:
897 return LowerMemcpy(MI, DL, BB, TII, false);
898 case WebAssembly::MEMCPY_A64:
899 return LowerMemcpy(MI, DL, BB, TII, true);
900 case WebAssembly::MEMSET_A32:
901 return LowerMemset(MI, DL, BB, TII, false);
902 case WebAssembly::MEMSET_A64:
903 return LowerMemset(MI, DL, BB, TII, true);
904 case WebAssembly::CALL_RESULTS:
905 case WebAssembly::RET_CALL_RESULTS:
906 return LowerCallResults(MI, DL, BB, Subtarget, TII);
907 }
908}
909
910const char *
911WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
912 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
914 break;
915#define HANDLE_NODETYPE(NODE) \
916 case WebAssemblyISD::NODE: \
917 return "WebAssemblyISD::" #NODE;
918#include "WebAssemblyISD.def"
919#undef HANDLE_NODETYPE
920 }
921 return nullptr;
922}
923
924std::pair<unsigned, const TargetRegisterClass *>
925WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
926 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
927 // First, see if this is a constraint that directly corresponds to a
928 // WebAssembly register class.
929 if (Constraint.size() == 1) {
930 switch (Constraint[0]) {
931 case 'r':
932 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
933 if (Subtarget->hasSIMD128() && VT.isVector()) {
934 if (VT.getSizeInBits() == 128)
935 return std::make_pair(0U, &WebAssembly::V128RegClass);
936 }
937 if (VT.isInteger() && !VT.isVector()) {
938 if (VT.getSizeInBits() <= 32)
939 return std::make_pair(0U, &WebAssembly::I32RegClass);
940 if (VT.getSizeInBits() <= 64)
941 return std::make_pair(0U, &WebAssembly::I64RegClass);
942 }
943 if (VT.isFloatingPoint() && !VT.isVector()) {
944 switch (VT.getSizeInBits()) {
945 case 32:
946 return std::make_pair(0U, &WebAssembly::F32RegClass);
947 case 64:
948 return std::make_pair(0U, &WebAssembly::F64RegClass);
949 default:
950 break;
951 }
952 }
953 break;
954 default:
955 break;
956 }
957 }
958
960}
961
962bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
963 // Assume ctz is a relatively cheap operation.
964 return true;
965}
966
967bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
968 // Assume clz is a relatively cheap operation.
969 return true;
970}
971
972bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
973 const AddrMode &AM,
974 Type *Ty, unsigned AS,
975 Instruction *I) const {
976 // WebAssembly offsets are added as unsigned without wrapping. The
977 // isLegalAddressingMode gives us no way to determine if wrapping could be
978 // happening, so we approximate this by accepting only non-negative offsets.
979 if (AM.BaseOffs < 0)
980 return false;
981
982 // WebAssembly has no scale register operands.
983 if (AM.Scale != 0)
984 return false;
985
986 // Everything else is legal.
987 return true;
988}
989
990bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
991 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
992 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
993 // WebAssembly supports unaligned accesses, though it should be declared
994 // with the p2align attribute on loads and stores which do so, and there
995 // may be a performance impact. We tell LLVM they're "fast" because
996 // for the kinds of things that LLVM uses this for (merging adjacent stores
997 // of constants, etc.), WebAssembly implementations will either want the
998 // unaligned access or they'll split anyway.
999 if (Fast)
1000 *Fast = 1;
1001 return true;
1002}
1003
1004bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1005 AttributeList Attr) const {
1006 // The current thinking is that wasm engines will perform this optimization,
1007 // so we can save on code size.
1008 return true;
1009}
1010
1011bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1012 EVT ExtT = ExtVal.getValueType();
1013 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1014 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1015 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1016 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1017}
1018
1019bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1020 const GlobalAddressSDNode *GA) const {
1021 // Wasm doesn't support function addresses with offsets
1022 const GlobalValue *GV = GA->getGlobal();
1023 return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
1024}
1025
1026EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1027 LLVMContext &C,
1028 EVT VT) const {
1029 if (VT.isVector())
1031
1032 // So far, all branch instructions in Wasm take an I32 condition.
1033 // The default TargetLowering::getSetCCResultType returns the pointer size,
1034 // which would be useful to reduce instruction counts when testing
1035 // against 64-bit pointers/values if at some point Wasm supports that.
1036 return EVT::getIntegerVT(C, 32);
1037}
1038
1039bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1040 const CallInst &I,
1041 MachineFunction &MF,
1042 unsigned Intrinsic) const {
1043 switch (Intrinsic) {
1044 case Intrinsic::wasm_memory_atomic_notify:
1046 Info.memVT = MVT::i32;
1047 Info.ptrVal = I.getArgOperand(0);
1048 Info.offset = 0;
1049 Info.align = Align(4);
1050 // atomic.notify instruction does not really load the memory specified with
1051 // this argument, but MachineMemOperand should either be load or store, so
1052 // we set this to a load.
1053 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1054 // instructions are treated as volatiles in the backend, so we should be
1055 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1057 return true;
1058 case Intrinsic::wasm_memory_atomic_wait32:
1060 Info.memVT = MVT::i32;
1061 Info.ptrVal = I.getArgOperand(0);
1062 Info.offset = 0;
1063 Info.align = Align(4);
1065 return true;
1066 case Intrinsic::wasm_memory_atomic_wait64:
1068 Info.memVT = MVT::i64;
1069 Info.ptrVal = I.getArgOperand(0);
1070 Info.offset = 0;
1071 Info.align = Align(8);
1073 return true;
1074 case Intrinsic::wasm_loadf16_f32:
1076 Info.memVT = MVT::f16;
1077 Info.ptrVal = I.getArgOperand(0);
1078 Info.offset = 0;
1079 Info.align = Align(2);
1081 return true;
1082 case Intrinsic::wasm_storef16_f32:
1084 Info.memVT = MVT::f16;
1085 Info.ptrVal = I.getArgOperand(1);
1086 Info.offset = 0;
1087 Info.align = Align(2);
1089 return true;
1090 default:
1091 return false;
1092 }
1093}
1094
1095void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1096 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1097 const SelectionDAG &DAG, unsigned Depth) const {
1098 switch (Op.getOpcode()) {
1099 default:
1100 break;
1102 unsigned IntNo = Op.getConstantOperandVal(0);
1103 switch (IntNo) {
1104 default:
1105 break;
1106 case Intrinsic::wasm_bitmask: {
1107 unsigned BitWidth = Known.getBitWidth();
1108 EVT VT = Op.getOperand(1).getSimpleValueType();
1109 unsigned PossibleBits = VT.getVectorNumElements();
1110 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1111 Known.Zero |= ZeroMask;
1112 break;
1113 }
1114 }
1115 break;
1116 }
1117
1118 // For 128-bit addition if the upper bits are all zero then it's known that
1119 // the upper bits of the result will have all bits guaranteed zero except the
1120 // first.
1121 case WebAssemblyISD::I64_ADD128:
1122 if (Op.getResNo() == 1) {
1123 SDValue LHS_HI = Op.getOperand(1);
1124 SDValue RHS_HI = Op.getOperand(3);
1125 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1126 Known.Zero.setBitsFrom(1);
1127 }
1128 break;
1129 }
1130}
1131
1133WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1134 if (VT.isFixedLengthVector()) {
1135 MVT EltVT = VT.getVectorElementType();
1136 // We have legal vector types with these lane types, so widening the
1137 // vector would let us use some of the lanes directly without having to
1138 // extend or truncate values.
1139 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1140 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1141 return TypeWidenVector;
1142 }
1143
1145}
1146
1147bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1148 SDValue Op, const TargetLoweringOpt &TLO) const {
1149 // ISel process runs DAGCombiner after legalization; this step is called
1150 // SelectionDAG optimization phase. This post-legalization combining process
1151 // runs DAGCombiner on each node, and if there was a change to be made,
1152 // re-runs legalization again on it and its user nodes to make sure
1153 // everythiing is in a legalized state.
1154 //
1155 // The legalization calls lowering routines, and we do our custom lowering for
1156 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1157 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1158 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1159 // turns unused vector elements into undefs. But this routine does not work
1160 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1161 // combination can result in a infinite loop, in which undefs are converted to
1162 // zeros in legalization and back to undefs in combining.
1163 //
1164 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1165 // running for build_vectors.
1166 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1167 return false;
1168 return true;
1169}
1170
1171//===----------------------------------------------------------------------===//
1172// WebAssembly Lowering private implementation.
1173//===----------------------------------------------------------------------===//
1174
1175//===----------------------------------------------------------------------===//
1176// Lowering Code
1177//===----------------------------------------------------------------------===//
1178
1179static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1181 DAG.getContext()->diagnose(
1182 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1183}
1184
1185// Test whether the given calling convention is supported.
1187 // We currently support the language-independent target-independent
1188 // conventions. We don't yet have a way to annotate calls with properties like
1189 // "cold", and we don't have any call-clobbered registers, so these are mostly
1190 // all handled the same.
1191 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1192 CallConv == CallingConv::Cold ||
1193 CallConv == CallingConv::PreserveMost ||
1194 CallConv == CallingConv::PreserveAll ||
1195 CallConv == CallingConv::CXX_FAST_TLS ||
1197 CallConv == CallingConv::Swift;
1198}
1199
1200SDValue
1201WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1202 SmallVectorImpl<SDValue> &InVals) const {
1203 SelectionDAG &DAG = CLI.DAG;
1204 SDLoc DL = CLI.DL;
1205 SDValue Chain = CLI.Chain;
1206 SDValue Callee = CLI.Callee;
1208 auto Layout = MF.getDataLayout();
1209
1210 CallingConv::ID CallConv = CLI.CallConv;
1211 if (!callingConvSupported(CallConv))
1212 fail(DL, DAG,
1213 "WebAssembly doesn't support language-specific or target-specific "
1214 "calling conventions yet");
1215 if (CLI.IsPatchPoint)
1216 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1217
1218 if (CLI.IsTailCall) {
1219 auto NoTail = [&](const char *Msg) {
1220 if (CLI.CB && CLI.CB->isMustTailCall())
1221 fail(DL, DAG, Msg);
1222 CLI.IsTailCall = false;
1223 };
1224
1225 if (!Subtarget->hasTailCall())
1226 NoTail("WebAssembly 'tail-call' feature not enabled");
1227
1228 // Varargs calls cannot be tail calls because the buffer is on the stack
1229 if (CLI.IsVarArg)
1230 NoTail("WebAssembly does not support varargs tail calls");
1231
1232 // Do not tail call unless caller and callee return types match
1233 const Function &F = MF.getFunction();
1235 Type *RetTy = F.getReturnType();
1236 SmallVector<MVT, 4> CallerRetTys;
1237 SmallVector<MVT, 4> CalleeRetTys;
1238 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1239 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1240 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1241 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1242 CalleeRetTys.begin());
1243 if (!TypesMatch)
1244 NoTail("WebAssembly tail call requires caller and callee return types to "
1245 "match");
1246
1247 // If pointers to local stack values are passed, we cannot tail call
1248 if (CLI.CB) {
1249 for (auto &Arg : CLI.CB->args()) {
1250 Value *Val = Arg.get();
1251 // Trace the value back through pointer operations
1252 while (true) {
1253 Value *Src = Val->stripPointerCastsAndAliases();
1254 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1255 Src = GEP->getPointerOperand();
1256 if (Val == Src)
1257 break;
1258 Val = Src;
1259 }
1260 if (isa<AllocaInst>(Val)) {
1261 NoTail(
1262 "WebAssembly does not support tail calling with stack arguments");
1263 break;
1264 }
1265 }
1266 }
1267 }
1268
1270 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1271 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1272
1273 // The generic code may have added an sret argument. If we're lowering an
1274 // invoke function, the ABI requires that the function pointer be the first
1275 // argument, so we may have to swap the arguments.
1276 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1277 Outs[0].Flags.isSRet()) {
1278 std::swap(Outs[0], Outs[1]);
1279 std::swap(OutVals[0], OutVals[1]);
1280 }
1281
1282 bool HasSwiftSelfArg = false;
1283 bool HasSwiftErrorArg = false;
1284 unsigned NumFixedArgs = 0;
1285 for (unsigned I = 0; I < Outs.size(); ++I) {
1286 const ISD::OutputArg &Out = Outs[I];
1287 SDValue &OutVal = OutVals[I];
1288 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1289 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1290 if (Out.Flags.isNest())
1291 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1292 if (Out.Flags.isInAlloca())
1293 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1294 if (Out.Flags.isInConsecutiveRegs())
1295 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1297 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1298 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1299 auto &MFI = MF.getFrameInfo();
1300 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1302 /*isSS=*/false);
1303 SDValue SizeNode =
1304 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1305 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1306 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1308 /*isVolatile*/ false, /*AlwaysInline=*/false,
1309 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1311 OutVal = FINode;
1312 }
1313 // Count the number of fixed args *after* legalization.
1314 NumFixedArgs += !Out.Flags.isVarArg();
1315 }
1316
1317 bool IsVarArg = CLI.IsVarArg;
1318 auto PtrVT = getPointerTy(Layout);
1319
1320 // For swiftcc, emit additional swiftself and swifterror arguments
1321 // if there aren't. These additional arguments are also added for callee
1322 // signature They are necessary to match callee and caller signature for
1323 // indirect call.
1324 if (CallConv == CallingConv::Swift) {
1325 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1326 if (!HasSwiftSelfArg) {
1327 NumFixedArgs++;
1329 Flags.setSwiftSelf();
1330 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1331 CLI.Outs.push_back(Arg);
1332 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1333 CLI.OutVals.push_back(ArgVal);
1334 }
1335 if (!HasSwiftErrorArg) {
1336 NumFixedArgs++;
1338 Flags.setSwiftError();
1339 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1340 CLI.Outs.push_back(Arg);
1341 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1342 CLI.OutVals.push_back(ArgVal);
1343 }
1344 }
1345
1346 // Analyze operands of the call, assigning locations to each operand.
1348 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1349
1350 if (IsVarArg) {
1351 // Outgoing non-fixed arguments are placed in a buffer. First
1352 // compute their offsets and the total amount of buffer space needed.
1353 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1354 const ISD::OutputArg &Out = Outs[I];
1355 SDValue &Arg = OutVals[I];
1356 EVT VT = Arg.getValueType();
1357 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1358 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1359 Align Alignment =
1360 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1361 unsigned Offset =
1362 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1363 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1364 Offset, VT.getSimpleVT(),
1366 }
1367 }
1368
1369 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1370
1371 SDValue FINode;
1372 if (IsVarArg && NumBytes) {
1373 // For non-fixed arguments, next emit stores to store the argument values
1374 // to the stack buffer at the offsets computed above.
1375 MaybeAlign StackAlign = Layout.getStackAlignment();
1376 assert(StackAlign && "data layout string is missing stack alignment");
1377 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1378 /*isSS=*/false);
1379 unsigned ValNo = 0;
1381 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1382 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1383 "ArgLocs should remain in order and only hold varargs args");
1384 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1385 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1386 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1387 DAG.getConstant(Offset, DL, PtrVT));
1388 Chains.push_back(
1389 DAG.getStore(Chain, DL, Arg, Add,
1391 }
1392 if (!Chains.empty())
1393 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1394 } else if (IsVarArg) {
1395 FINode = DAG.getIntPtrConstant(0, DL);
1396 }
1397
1398 if (Callee->getOpcode() == ISD::GlobalAddress) {
1399 // If the callee is a GlobalAddress node (quite common, every direct call
1400 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1401 // doesn't at MO_GOT which is not needed for direct calls.
1402 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1405 GA->getOffset());
1406 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1407 getPointerTy(DAG.getDataLayout()), Callee);
1408 }
1409
1410 // Compute the operands for the CALLn node.
1412 Ops.push_back(Chain);
1413 Ops.push_back(Callee);
1414
1415 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1416 // isn't reliable.
1417 Ops.append(OutVals.begin(),
1418 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1419 // Add a pointer to the vararg buffer.
1420 if (IsVarArg)
1421 Ops.push_back(FINode);
1422
1423 SmallVector<EVT, 8> InTys;
1424 for (const auto &In : Ins) {
1425 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1426 assert(!In.Flags.isNest() && "nest is not valid for return values");
1427 if (In.Flags.isInAlloca())
1428 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1429 if (In.Flags.isInConsecutiveRegs())
1430 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1431 if (In.Flags.isInConsecutiveRegsLast())
1432 fail(DL, DAG,
1433 "WebAssembly hasn't implemented cons regs last return values");
1434 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1435 // registers.
1436 InTys.push_back(In.VT);
1437 }
1438
1439 // Lastly, if this is a call to a funcref we need to add an instruction
1440 // table.set to the chain and transform the call.
1442 CLI.CB->getCalledOperand()->getType())) {
1443 // In the absence of function references proposal where a funcref call is
1444 // lowered to call_ref, using reference types we generate a table.set to set
1445 // the funcref to a special table used solely for this purpose, followed by
1446 // a call_indirect. Here we just generate the table set, and return the
1447 // SDValue of the table.set so that LowerCall can finalize the lowering by
1448 // generating the call_indirect.
1449 SDValue Chain = Ops[0];
1450
1452 MF.getContext(), Subtarget);
1453 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1454 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1455 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1456 SDValue TableSet = DAG.getMemIntrinsicNode(
1457 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1458 MVT::funcref,
1459 // Machine Mem Operand args
1462 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1464
1465 Ops[0] = TableSet; // The new chain is the TableSet itself
1466 }
1467
1468 if (CLI.IsTailCall) {
1469 // ret_calls do not return values to the current frame
1470 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1471 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1472 }
1473
1474 InTys.push_back(MVT::Other);
1475 SDVTList InTyList = DAG.getVTList(InTys);
1476 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1477
1478 for (size_t I = 0; I < Ins.size(); ++I)
1479 InVals.push_back(Res.getValue(I));
1480
1481 // Return the chain
1482 return Res.getValue(Ins.size());
1483}
1484
1485bool WebAssemblyTargetLowering::CanLowerReturn(
1486 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1487 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1488 const Type *RetTy) const {
1489 // WebAssembly can only handle returning tuples with multivalue enabled
1490 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1491}
1492
1493SDValue WebAssemblyTargetLowering::LowerReturn(
1494 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1496 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1497 SelectionDAG &DAG) const {
1498 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1499 "MVP WebAssembly can only return up to one value");
1500 if (!callingConvSupported(CallConv))
1501 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1502
1503 SmallVector<SDValue, 4> RetOps(1, Chain);
1504 RetOps.append(OutVals.begin(), OutVals.end());
1505 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1506
1507 // Record the number and types of the return values.
1508 for (const ISD::OutputArg &Out : Outs) {
1509 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1510 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1511 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1512 if (Out.Flags.isInAlloca())
1513 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1514 if (Out.Flags.isInConsecutiveRegs())
1515 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1517 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1518 }
1519
1520 return Chain;
1521}
1522
1523SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1524 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1525 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1526 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1527 if (!callingConvSupported(CallConv))
1528 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1529
1531 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1532
1533 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1534 // of the incoming values before they're represented by virtual registers.
1535 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1536
1537 bool HasSwiftErrorArg = false;
1538 bool HasSwiftSelfArg = false;
1539 for (const ISD::InputArg &In : Ins) {
1540 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1541 HasSwiftErrorArg |= In.Flags.isSwiftError();
1542 if (In.Flags.isInAlloca())
1543 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1544 if (In.Flags.isNest())
1545 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1546 if (In.Flags.isInConsecutiveRegs())
1547 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1548 if (In.Flags.isInConsecutiveRegsLast())
1549 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1550 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1551 // registers.
1552 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1553 DAG.getTargetConstant(InVals.size(),
1554 DL, MVT::i32))
1555 : DAG.getUNDEF(In.VT));
1556
1557 // Record the number and types of arguments.
1558 MFI->addParam(In.VT);
1559 }
1560
1561 // For swiftcc, emit additional swiftself and swifterror arguments
1562 // if there aren't. These additional arguments are also added for callee
1563 // signature They are necessary to match callee and caller signature for
1564 // indirect call.
1565 auto PtrVT = getPointerTy(MF.getDataLayout());
1566 if (CallConv == CallingConv::Swift) {
1567 if (!HasSwiftSelfArg) {
1568 MFI->addParam(PtrVT);
1569 }
1570 if (!HasSwiftErrorArg) {
1571 MFI->addParam(PtrVT);
1572 }
1573 }
1574 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1575 // the buffer is passed as an argument.
1576 if (IsVarArg) {
1577 MVT PtrVT = getPointerTy(MF.getDataLayout());
1578 Register VarargVreg =
1580 MFI->setVarargBufferVreg(VarargVreg);
1581 Chain = DAG.getCopyToReg(
1582 Chain, DL, VarargVreg,
1583 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1584 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1585 MFI->addParam(PtrVT);
1586 }
1587
1588 // Record the number and types of arguments and results.
1589 SmallVector<MVT, 4> Params;
1592 MF.getFunction(), DAG.getTarget(), Params, Results);
1593 for (MVT VT : Results)
1594 MFI->addResult(VT);
1595 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1596 // the param logic here with ComputeSignatureVTs
1597 assert(MFI->getParams().size() == Params.size() &&
1598 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1599 Params.begin()));
1600
1601 return Chain;
1602}
1603
1604void WebAssemblyTargetLowering::ReplaceNodeResults(
1606 switch (N->getOpcode()) {
1608 // Do not add any results, signifying that N should not be custom lowered
1609 // after all. This happens because simd128 turns on custom lowering for
1610 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1611 // illegal type.
1612 break;
1615 // Do not add any results, signifying that N should not be custom lowered.
1616 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1617 break;
1618 case ISD::ADD:
1619 case ISD::SUB:
1620 Results.push_back(Replace128Op(N, DAG));
1621 break;
1622 default:
1624 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1625 }
1626}
1627
1628//===----------------------------------------------------------------------===//
1629// Custom lowering hooks.
1630//===----------------------------------------------------------------------===//
1631
1632SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1633 SelectionDAG &DAG) const {
1634 SDLoc DL(Op);
1635 switch (Op.getOpcode()) {
1636 default:
1637 llvm_unreachable("unimplemented operation lowering");
1638 return SDValue();
1639 case ISD::FrameIndex:
1640 return LowerFrameIndex(Op, DAG);
1641 case ISD::GlobalAddress:
1642 return LowerGlobalAddress(Op, DAG);
1644 return LowerGlobalTLSAddress(Op, DAG);
1646 return LowerExternalSymbol(Op, DAG);
1647 case ISD::JumpTable:
1648 return LowerJumpTable(Op, DAG);
1649 case ISD::BR_JT:
1650 return LowerBR_JT(Op, DAG);
1651 case ISD::VASTART:
1652 return LowerVASTART(Op, DAG);
1653 case ISD::BlockAddress:
1654 case ISD::BRIND:
1655 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1656 return SDValue();
1657 case ISD::RETURNADDR:
1658 return LowerRETURNADDR(Op, DAG);
1659 case ISD::FRAMEADDR:
1660 return LowerFRAMEADDR(Op, DAG);
1661 case ISD::CopyToReg:
1662 return LowerCopyToReg(Op, DAG);
1665 return LowerAccessVectorElement(Op, DAG);
1669 return LowerIntrinsic(Op, DAG);
1671 return LowerSIGN_EXTEND_INREG(Op, DAG);
1674 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1675 case ISD::BUILD_VECTOR:
1676 return LowerBUILD_VECTOR(Op, DAG);
1678 return LowerVECTOR_SHUFFLE(Op, DAG);
1679 case ISD::SETCC:
1680 return LowerSETCC(Op, DAG);
1681 case ISD::SHL:
1682 case ISD::SRA:
1683 case ISD::SRL:
1684 return LowerShift(Op, DAG);
1687 return LowerFP_TO_INT_SAT(Op, DAG);
1688 case ISD::LOAD:
1689 return LowerLoad(Op, DAG);
1690 case ISD::STORE:
1691 return LowerStore(Op, DAG);
1692 case ISD::CTPOP:
1693 case ISD::CTLZ:
1694 case ISD::CTTZ:
1695 return DAG.UnrollVectorOp(Op.getNode());
1696 case ISD::CLEAR_CACHE:
1697 report_fatal_error("llvm.clear_cache is not supported on wasm");
1698 case ISD::SMUL_LOHI:
1699 case ISD::UMUL_LOHI:
1700 return LowerMUL_LOHI(Op, DAG);
1701 case ISD::UADDO:
1702 return LowerUADDO(Op, DAG);
1703 }
1704}
1705
1707 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
1709
1710 return false;
1711}
1712
1713static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1714 SelectionDAG &DAG) {
1715 const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op);
1716 if (!FI)
1717 return std::nullopt;
1718
1719 auto &MF = DAG.getMachineFunction();
1721}
1722
1723SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1724 SelectionDAG &DAG) const {
1725 SDLoc DL(Op);
1726 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1727 const SDValue &Value = SN->getValue();
1728 const SDValue &Base = SN->getBasePtr();
1729 const SDValue &Offset = SN->getOffset();
1730
1732 if (!Offset->isUndef())
1733 report_fatal_error("unexpected offset when storing to webassembly global",
1734 false);
1735
1736 SDVTList Tys = DAG.getVTList(MVT::Other);
1737 SDValue Ops[] = {SN->getChain(), Value, Base};
1738 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1739 SN->getMemoryVT(), SN->getMemOperand());
1740 }
1741
1742 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1743 if (!Offset->isUndef())
1744 report_fatal_error("unexpected offset when storing to webassembly local",
1745 false);
1746
1747 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1748 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1749 SDValue Ops[] = {SN->getChain(), Idx, Value};
1750 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1751 }
1752
1755 "Encountered an unlowerable store to the wasm_var address space",
1756 false);
1757
1758 return Op;
1759}
1760
1761SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1762 SelectionDAG &DAG) const {
1763 SDLoc DL(Op);
1764 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1765 const SDValue &Base = LN->getBasePtr();
1766 const SDValue &Offset = LN->getOffset();
1767
1769 if (!Offset->isUndef())
1771 "unexpected offset when loading from webassembly global", false);
1772
1773 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1774 SDValue Ops[] = {LN->getChain(), Base};
1775 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1776 LN->getMemoryVT(), LN->getMemOperand());
1777 }
1778
1779 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1780 if (!Offset->isUndef())
1782 "unexpected offset when loading from webassembly local", false);
1783
1784 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1785 EVT LocalVT = LN->getValueType(0);
1786 SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,
1787 {LN->getChain(), Idx});
1788 SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);
1789 assert(Result->getNumValues() == 2 && "Loads must carry a chain!");
1790 return Result;
1791 }
1792
1795 "Encountered an unlowerable load from the wasm_var address space",
1796 false);
1797
1798 return Op;
1799}
1800
1801SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1802 SelectionDAG &DAG) const {
1803 assert(Subtarget->hasWideArithmetic());
1804 assert(Op.getValueType() == MVT::i64);
1805 SDLoc DL(Op);
1806 unsigned Opcode;
1807 switch (Op.getOpcode()) {
1808 case ISD::UMUL_LOHI:
1809 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1810 break;
1811 case ISD::SMUL_LOHI:
1812 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1813 break;
1814 default:
1815 llvm_unreachable("unexpected opcode");
1816 }
1817 SDValue LHS = Op.getOperand(0);
1818 SDValue RHS = Op.getOperand(1);
1819 SDValue Lo =
1820 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1821 SDValue Hi(Lo.getNode(), 1);
1822 SDValue Ops[] = {Lo, Hi};
1823 return DAG.getMergeValues(Ops, DL);
1824}
1825
1826// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1827//
1828// This enables generating a single wasm instruction for this operation where
1829// the upper half of both operands are constant zeros. The upper half of the
1830// result is then whether the overflow happened.
1831SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1832 SelectionDAG &DAG) const {
1833 assert(Subtarget->hasWideArithmetic());
1834 assert(Op.getValueType() == MVT::i64);
1835 assert(Op.getOpcode() == ISD::UADDO);
1836 SDLoc DL(Op);
1837 SDValue LHS = Op.getOperand(0);
1838 SDValue RHS = Op.getOperand(1);
1839 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1840 SDValue Result =
1841 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1842 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1843 SDValue CarryI64(Result.getNode(), 1);
1844 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1845 SDValue Ops[] = {Result, CarryI32};
1846 return DAG.getMergeValues(Ops, DL);
1847}
1848
1849SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1850 SelectionDAG &DAG) const {
1851 assert(Subtarget->hasWideArithmetic());
1852 assert(N->getValueType(0) == MVT::i128);
1853 SDLoc DL(N);
1854 unsigned Opcode;
1855 switch (N->getOpcode()) {
1856 case ISD::ADD:
1857 Opcode = WebAssemblyISD::I64_ADD128;
1858 break;
1859 case ISD::SUB:
1860 Opcode = WebAssemblyISD::I64_SUB128;
1861 break;
1862 default:
1863 llvm_unreachable("unexpected opcode");
1864 }
1865 SDValue LHS = N->getOperand(0);
1866 SDValue RHS = N->getOperand(1);
1867
1868 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1869 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1870 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1871 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1872 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1873 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1874 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1875 LHS_0, LHS_1, RHS_0, RHS_1);
1876 SDValue Result_HI(Result_LO.getNode(), 1);
1877 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1878}
1879
1880SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1881 SelectionDAG &DAG) const {
1882 SDValue Src = Op.getOperand(2);
1883 if (isa<FrameIndexSDNode>(Src.getNode())) {
1884 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1885 // the FI to some LEA-like instruction, but since we don't have that, we
1886 // need to insert some kind of instruction that can take an FI operand and
1887 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1888 // local.copy between Op and its FI operand.
1889 SDValue Chain = Op.getOperand(0);
1890 SDLoc DL(Op);
1891 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1892 EVT VT = Src.getValueType();
1893 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1894 : WebAssembly::COPY_I64,
1895 DL, VT, Src),
1896 0);
1897 return Op.getNode()->getNumValues() == 1
1898 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1899 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1900 Op.getNumOperands() == 4 ? Op.getOperand(3)
1901 : SDValue());
1902 }
1903 return SDValue();
1904}
1905
1906SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1907 SelectionDAG &DAG) const {
1908 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1909 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1910}
1911
1912SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1913 SelectionDAG &DAG) const {
1914 SDLoc DL(Op);
1915
1916 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1917 fail(DL, DAG,
1918 "Non-Emscripten WebAssembly hasn't implemented "
1919 "__builtin_return_address");
1920 return SDValue();
1921 }
1922
1923 unsigned Depth = Op.getConstantOperandVal(0);
1924 MakeLibCallOptions CallOptions;
1925 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1926 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1927 .first;
1928}
1929
1930SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1931 SelectionDAG &DAG) const {
1932 // Non-zero depths are not supported by WebAssembly currently. Use the
1933 // legalizer's default expansion, which is to return 0 (what this function is
1934 // documented to do).
1935 if (Op.getConstantOperandVal(0) > 0)
1936 return SDValue();
1937
1939 EVT VT = Op.getValueType();
1940 Register FP =
1942 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1943}
1944
1945SDValue
1946WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1947 SelectionDAG &DAG) const {
1948 SDLoc DL(Op);
1949 const auto *GA = cast<GlobalAddressSDNode>(Op);
1950
1953 report_fatal_error("cannot use thread-local storage without bulk memory",
1954 false);
1955
1956 const GlobalValue *GV = GA->getGlobal();
1957
1958 // Currently only Emscripten supports dynamic linking with threads. Therefore,
1959 // on other targets, if we have thread-local storage, only the local-exec
1960 // model is possible.
1961 auto model = Subtarget->getTargetTriple().isOSEmscripten()
1962 ? GV->getThreadLocalMode()
1964
1965 // Unsupported TLS modes
1968
1969 if (model == GlobalValue::LocalExecTLSModel ||
1972 getTargetMachine().shouldAssumeDSOLocal(GV))) {
1973 // For DSO-local TLS variables we use offset from __tls_base
1974
1975 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1976 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1977 : WebAssembly::GLOBAL_GET_I32;
1978 const char *BaseName = MF.createExternalSymbolName("__tls_base");
1979
1981 DAG.getMachineNode(GlobalGet, DL, PtrVT,
1982 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
1983 0);
1984
1985 SDValue TLSOffset = DAG.getTargetGlobalAddress(
1986 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
1987 SDValue SymOffset =
1988 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
1989
1990 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
1991 }
1992
1994
1995 EVT VT = Op.getValueType();
1996 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1997 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1998 GA->getOffset(),
2000}
2001
2002SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2003 SelectionDAG &DAG) const {
2004 SDLoc DL(Op);
2005 const auto *GA = cast<GlobalAddressSDNode>(Op);
2006 EVT VT = Op.getValueType();
2007 assert(GA->getTargetFlags() == 0 &&
2008 "Unexpected target flags on generic GlobalAddressSDNode");
2010 fail(DL, DAG, "Invalid address space for WebAssembly target");
2011
2012 unsigned OperandFlags = 0;
2013 const GlobalValue *GV = GA->getGlobal();
2014 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2015 // need special treatment for tables in PIC mode.
2016 if (isPositionIndependent() &&
2018 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2020 MVT PtrVT = getPointerTy(MF.getDataLayout());
2021 const char *BaseName;
2022 if (GV->getValueType()->isFunctionTy()) {
2023 BaseName = MF.createExternalSymbolName("__table_base");
2025 } else {
2026 BaseName = MF.createExternalSymbolName("__memory_base");
2028 }
2030 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2031 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2032
2033 SDValue SymAddr = DAG.getNode(
2034 WebAssemblyISD::WrapperREL, DL, VT,
2035 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2036 OperandFlags));
2037
2038 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2039 }
2041 }
2042
2043 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2044 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2045 GA->getOffset(), OperandFlags));
2046}
2047
2048SDValue
2049WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2050 SelectionDAG &DAG) const {
2051 SDLoc DL(Op);
2052 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2053 EVT VT = Op.getValueType();
2054 assert(ES->getTargetFlags() == 0 &&
2055 "Unexpected target flags on generic ExternalSymbolSDNode");
2056 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2057 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2058}
2059
2060SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2061 SelectionDAG &DAG) const {
2062 // There's no need for a Wrapper node because we always incorporate a jump
2063 // table operand into a BR_TABLE instruction, rather than ever
2064 // materializing it in a register.
2065 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2066 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2067 JT->getTargetFlags());
2068}
2069
2070SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2071 SelectionDAG &DAG) const {
2072 SDLoc DL(Op);
2073 SDValue Chain = Op.getOperand(0);
2074 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2075 SDValue Index = Op.getOperand(2);
2076 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2077
2079 Ops.push_back(Chain);
2080 Ops.push_back(Index);
2081
2083 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2084
2085 // Add an operand for each case.
2086 for (auto *MBB : MBBs)
2087 Ops.push_back(DAG.getBasicBlock(MBB));
2088
2089 // Add the first MBB as a dummy default target for now. This will be replaced
2090 // with the proper default target (and the preceding range check eliminated)
2091 // if possible by WebAssemblyFixBrTableDefaults.
2092 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2093 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2094}
2095
2096SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2097 SelectionDAG &DAG) const {
2098 SDLoc DL(Op);
2100
2102 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2103
2104 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2105 MFI->getVarargBufferVreg(), PtrVT);
2106 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2107 MachinePointerInfo(SV));
2108}
2109
2110// Try to lower partial.reduce.add to a dot or fallback to a sequence with
2111// extmul and adds.
2113 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
2114 if (N->getConstantOperandVal(0) !=
2115 Intrinsic::experimental_vector_partial_reduce_add)
2116 return SDValue();
2117
2118 assert(N->getValueType(0) == MVT::v4i32 && "can only support v4i32");
2119 SDLoc DL(N);
2120 SDValue Mul = N->getOperand(2);
2121 assert(Mul->getOpcode() == ISD::MUL && "expected mul input");
2122
2123 SDValue ExtendLHS = Mul->getOperand(0);
2124 SDValue ExtendRHS = Mul->getOperand(1);
2125 assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) &&
2126 ISD::isExtOpcode(ExtendRHS.getOpcode())) &&
2127 "expected widening mul");
2128 assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() &&
2129 "expected mul to use the same extend for both operands");
2130
2131 SDValue ExtendInLHS = ExtendLHS->getOperand(0);
2132 SDValue ExtendInRHS = ExtendRHS->getOperand(0);
2133 bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND;
2134
2135 if (ExtendInLHS->getValueType(0) == MVT::v8i16) {
2136 if (IsSigned) {
2137 // i32x4.dot_i16x8_s
2138 SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32,
2139 ExtendInLHS, ExtendInRHS);
2140 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot);
2141 }
2142
2143 unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
2144 unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
2145
2146 // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs)))
2147 SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInLHS);
2148 SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v4i32, ExtendInRHS);
2149 SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInLHS);
2150 SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v4i32, ExtendInRHS);
2151
2152 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v4i32, LowLHS, LowRHS);
2153 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v4i32, HighLHS, HighRHS);
2154 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, MulLow, MulHigh);
2155 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2156 } else {
2157 assert(ExtendInLHS->getValueType(0) == MVT::v16i8 &&
2158 "expected v16i8 input types");
2159 // Lower to a wider tree, using twice the operations compared to above.
2160 if (IsSigned) {
2161 // Use two dots
2162 unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_S;
2163 unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_S;
2164 SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS);
2165 SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS);
2166 SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS);
2167 SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS);
2168 SDValue DotLHS =
2169 DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS);
2170 SDValue DotRHS =
2171 DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS);
2172 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS);
2173 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2174 }
2175
2176 unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
2177 unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
2178 SDValue LowLHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInLHS);
2179 SDValue LowRHS = DAG.getNode(LowOpc, DL, MVT::v8i16, ExtendInRHS);
2180 SDValue HighLHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInLHS);
2181 SDValue HighRHS = DAG.getNode(HighOpc, DL, MVT::v8i16, ExtendInRHS);
2182
2183 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
2184 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
2185
2186 SDValue LowLow = DAG.getNode(LowOpc, DL, MVT::v4i32, MulLow);
2187 SDValue LowHigh = DAG.getNode(LowOpc, DL, MVT::v4i32, MulHigh);
2188 SDValue HighLow = DAG.getNode(HighOpc, DL, MVT::v4i32, MulLow);
2189 SDValue HighHigh = DAG.getNode(HighOpc, DL, MVT::v4i32, MulHigh);
2190
2191 SDValue AddLow = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowLow, HighLow);
2192 SDValue AddHigh = DAG.getNode(ISD::ADD, DL, MVT::v4i32, LowHigh, HighHigh);
2193 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh);
2194 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2195 }
2196}
2197
2198SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2199 SelectionDAG &DAG) const {
2201 unsigned IntNo;
2202 switch (Op.getOpcode()) {
2205 IntNo = Op.getConstantOperandVal(1);
2206 break;
2208 IntNo = Op.getConstantOperandVal(0);
2209 break;
2210 default:
2211 llvm_unreachable("Invalid intrinsic");
2212 }
2213 SDLoc DL(Op);
2214
2215 switch (IntNo) {
2216 default:
2217 return SDValue(); // Don't custom lower most intrinsics.
2218
2219 case Intrinsic::wasm_lsda: {
2220 auto PtrVT = getPointerTy(MF.getDataLayout());
2221 const char *SymName = MF.createExternalSymbolName(
2222 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2223 if (isPositionIndependent()) {
2225 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2226 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2228 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2229 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2230 SDValue SymAddr =
2231 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2232 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2233 }
2234 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2235 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2236 }
2237
2238 case Intrinsic::wasm_shuffle: {
2239 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2240 SDValue Ops[18];
2241 size_t OpIdx = 0;
2242 Ops[OpIdx++] = Op.getOperand(1);
2243 Ops[OpIdx++] = Op.getOperand(2);
2244 while (OpIdx < 18) {
2245 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2246 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2247 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2248 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2249 } else {
2250 Ops[OpIdx++] = MaskIdx;
2251 }
2252 }
2253 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2254 }
2255
2256 case Intrinsic::thread_pointer: {
2257 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2258 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2259 : WebAssembly::GLOBAL_GET_I32;
2260 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2261 return SDValue(
2262 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2263 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2264 0);
2265 }
2266 }
2267}
2268
2269SDValue
2270WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2271 SelectionDAG &DAG) const {
2272 SDLoc DL(Op);
2273 // If sign extension operations are disabled, allow sext_inreg only if operand
2274 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2275 // extension operations, but allowing sext_inreg in this context lets us have
2276 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2277 // everywhere would be simpler in this file, but would necessitate large and
2278 // brittle patterns to undo the expansion and select extract_lane_s
2279 // instructions.
2280 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2281 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2282 return SDValue();
2283
2284 const SDValue &Extract = Op.getOperand(0);
2285 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2286 if (VecT.getVectorElementType().getSizeInBits() > 32)
2287 return SDValue();
2288 MVT ExtractedLaneT =
2289 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2290 MVT ExtractedVecT =
2291 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2292 if (ExtractedVecT == VecT)
2293 return Op;
2294
2295 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2296 const SDNode *Index = Extract.getOperand(1).getNode();
2297 if (!isa<ConstantSDNode>(Index))
2298 return SDValue();
2299 unsigned IndexVal = Index->getAsZExtVal();
2300 unsigned Scale =
2301 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2302 assert(Scale > 1);
2303 SDValue NewIndex =
2304 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2305 SDValue NewExtract = DAG.getNode(
2307 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2308 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2309 Op.getOperand(1));
2310}
2311
2312static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2313 SelectionDAG &DAG) {
2314 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2315 return SDValue();
2316
2317 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2318 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2319 "expected extend_low");
2320 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2321
2322 ArrayRef<int> Mask = Shuffle->getMask();
2323 // Look for a shuffle which moves from the high half to the low half.
2324 size_t FirstIdx = Mask.size() / 2;
2325 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2326 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2327 return SDValue();
2328 }
2329 }
2330
2331 SDLoc DL(Op);
2332 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2333 ? WebAssemblyISD::EXTEND_HIGH_S
2334 : WebAssemblyISD::EXTEND_HIGH_U;
2335 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2336}
2337
2338SDValue
2339WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2340 SelectionDAG &DAG) const {
2341 SDLoc DL(Op);
2342 EVT VT = Op.getValueType();
2343 SDValue Src = Op.getOperand(0);
2344 EVT SrcVT = Src.getValueType();
2345
2346 if (SrcVT.getVectorElementType() == MVT::i1 ||
2347 SrcVT.getVectorElementType() == MVT::i64)
2348 return SDValue();
2349
2350 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2351 "Unexpected extension factor.");
2352 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2353
2354 if (Scale != 2 && Scale != 4 && Scale != 8)
2355 return SDValue();
2356
2357 unsigned Ext;
2358 switch (Op.getOpcode()) {
2360 Ext = WebAssemblyISD::EXTEND_LOW_U;
2361 break;
2363 Ext = WebAssemblyISD::EXTEND_LOW_S;
2364 break;
2365 }
2366
2367 if (Scale == 2) {
2368 // See if we can use EXTEND_HIGH.
2369 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2370 return ExtendHigh;
2371 }
2372
2373 SDValue Ret = Src;
2374 while (Scale != 1) {
2375 Ret = DAG.getNode(Ext, DL,
2376 Ret.getValueType()
2377 .widenIntegerVectorElementType(*DAG.getContext())
2378 .getHalfNumVectorElementsVT(*DAG.getContext()),
2379 Ret);
2380 Scale /= 2;
2381 }
2382 assert(Ret.getValueType() == VT);
2383 return Ret;
2384}
2385
2387 SDLoc DL(Op);
2388 if (Op.getValueType() != MVT::v2f64)
2389 return SDValue();
2390
2391 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2392 unsigned &Index) -> bool {
2393 switch (Op.getOpcode()) {
2394 case ISD::SINT_TO_FP:
2395 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2396 break;
2397 case ISD::UINT_TO_FP:
2398 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2399 break;
2400 case ISD::FP_EXTEND:
2401 Opcode = WebAssemblyISD::PROMOTE_LOW;
2402 break;
2403 default:
2404 return false;
2405 }
2406
2407 auto ExtractVector = Op.getOperand(0);
2408 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2409 return false;
2410
2411 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2412 return false;
2413
2414 SrcVec = ExtractVector.getOperand(0);
2415 Index = ExtractVector.getConstantOperandVal(1);
2416 return true;
2417 };
2418
2419 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2420 SDValue LHSSrcVec, RHSSrcVec;
2421 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2422 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2423 return SDValue();
2424
2425 if (LHSOpcode != RHSOpcode)
2426 return SDValue();
2427
2428 MVT ExpectedSrcVT;
2429 switch (LHSOpcode) {
2430 case WebAssemblyISD::CONVERT_LOW_S:
2431 case WebAssemblyISD::CONVERT_LOW_U:
2432 ExpectedSrcVT = MVT::v4i32;
2433 break;
2434 case WebAssemblyISD::PROMOTE_LOW:
2435 ExpectedSrcVT = MVT::v4f32;
2436 break;
2437 }
2438 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2439 return SDValue();
2440
2441 auto Src = LHSSrcVec;
2442 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2443 // Shuffle the source vector so that the converted lanes are the low lanes.
2444 Src = DAG.getVectorShuffle(
2445 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2446 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2447 }
2448 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2449}
2450
2451SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2452 SelectionDAG &DAG) const {
2453 MVT VT = Op.getSimpleValueType();
2454 if (VT == MVT::v8f16) {
2455 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2456 // FP16 type, so cast them to I16s.
2457 MVT IVT = VT.changeVectorElementType(MVT::i16);
2459 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2460 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2461 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2462 return DAG.getBitcast(VT, Res);
2463 }
2464
2465 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2466 return ConvertLow;
2467
2468 SDLoc DL(Op);
2469 const EVT VecT = Op.getValueType();
2470 const EVT LaneT = Op.getOperand(0).getValueType();
2471 const size_t Lanes = Op.getNumOperands();
2472 bool CanSwizzle = VecT == MVT::v16i8;
2473
2474 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2475 // possible number of lanes at once followed by a sequence of replace_lane
2476 // instructions to individually initialize any remaining lanes.
2477
2478 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2479 // swizzled lanes should be given greater weight.
2480
2481 // TODO: Investigate looping rather than always extracting/replacing specific
2482 // lanes to fill gaps.
2483
2484 auto IsConstant = [](const SDValue &V) {
2485 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2486 };
2487
2488 // Returns the source vector and index vector pair if they exist. Checks for:
2489 // (extract_vector_elt
2490 // $src,
2491 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2492 // )
2493 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2494 auto Bail = std::make_pair(SDValue(), SDValue());
2495 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2496 return Bail;
2497 const SDValue &SwizzleSrc = Lane->getOperand(0);
2498 const SDValue &IndexExt = Lane->getOperand(1);
2499 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2500 return Bail;
2501 const SDValue &Index = IndexExt->getOperand(0);
2502 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2503 return Bail;
2504 const SDValue &SwizzleIndices = Index->getOperand(0);
2505 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2506 SwizzleIndices.getValueType() != MVT::v16i8 ||
2507 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2508 Index->getConstantOperandVal(1) != I)
2509 return Bail;
2510 return std::make_pair(SwizzleSrc, SwizzleIndices);
2511 };
2512
2513 // If the lane is extracted from another vector at a constant index, return
2514 // that vector. The source vector must not have more lanes than the dest
2515 // because the shufflevector indices are in terms of the destination lanes and
2516 // would not be able to address the smaller individual source lanes.
2517 auto GetShuffleSrc = [&](const SDValue &Lane) {
2518 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2519 return SDValue();
2520 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2521 return SDValue();
2522 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2523 VecT.getVectorNumElements())
2524 return SDValue();
2525 return Lane->getOperand(0);
2526 };
2527
2528 using ValueEntry = std::pair<SDValue, size_t>;
2529 SmallVector<ValueEntry, 16> SplatValueCounts;
2530
2531 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2532 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2533
2534 using ShuffleEntry = std::pair<SDValue, size_t>;
2535 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2536
2537 auto AddCount = [](auto &Counts, const auto &Val) {
2538 auto CountIt =
2539 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2540 if (CountIt == Counts.end()) {
2541 Counts.emplace_back(Val, 1);
2542 } else {
2543 CountIt->second++;
2544 }
2545 };
2546
2547 auto GetMostCommon = [](auto &Counts) {
2548 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2549 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2550 return *CommonIt;
2551 };
2552
2553 size_t NumConstantLanes = 0;
2554
2555 // Count eligible lanes for each type of vector creation op
2556 for (size_t I = 0; I < Lanes; ++I) {
2557 const SDValue &Lane = Op->getOperand(I);
2558 if (Lane.isUndef())
2559 continue;
2560
2561 AddCount(SplatValueCounts, Lane);
2562
2563 if (IsConstant(Lane))
2564 NumConstantLanes++;
2565 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2566 AddCount(ShuffleCounts, ShuffleSrc);
2567 if (CanSwizzle) {
2568 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2569 if (SwizzleSrcs.first)
2570 AddCount(SwizzleCounts, SwizzleSrcs);
2571 }
2572 }
2573
2574 SDValue SplatValue;
2575 size_t NumSplatLanes;
2576 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2577
2578 SDValue SwizzleSrc;
2579 SDValue SwizzleIndices;
2580 size_t NumSwizzleLanes = 0;
2581 if (SwizzleCounts.size())
2582 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2583 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2584
2585 // Shuffles can draw from up to two vectors, so find the two most common
2586 // sources.
2587 SDValue ShuffleSrc1, ShuffleSrc2;
2588 size_t NumShuffleLanes = 0;
2589 if (ShuffleCounts.size()) {
2590 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2591 llvm::erase_if(ShuffleCounts,
2592 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2593 }
2594 if (ShuffleCounts.size()) {
2595 size_t AdditionalShuffleLanes;
2596 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2597 GetMostCommon(ShuffleCounts);
2598 NumShuffleLanes += AdditionalShuffleLanes;
2599 }
2600
2601 // Predicate returning true if the lane is properly initialized by the
2602 // original instruction
2603 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2605 // Prefer swizzles over shuffles over vector consts over splats
2606 if (NumSwizzleLanes >= NumShuffleLanes &&
2607 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2608 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2609 SwizzleIndices);
2610 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2611 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2612 return Swizzled == GetSwizzleSrcs(I, Lane);
2613 };
2614 } else if (NumShuffleLanes >= NumConstantLanes &&
2615 NumShuffleLanes >= NumSplatLanes) {
2616 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2617 size_t DestLaneCount = VecT.getVectorNumElements();
2618 size_t Scale1 = 1;
2619 size_t Scale2 = 1;
2620 SDValue Src1 = ShuffleSrc1;
2621 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2622 if (Src1.getValueType() != VecT) {
2623 size_t LaneSize =
2625 assert(LaneSize > DestLaneSize);
2626 Scale1 = LaneSize / DestLaneSize;
2627 Src1 = DAG.getBitcast(VecT, Src1);
2628 }
2629 if (Src2.getValueType() != VecT) {
2630 size_t LaneSize =
2632 assert(LaneSize > DestLaneSize);
2633 Scale2 = LaneSize / DestLaneSize;
2634 Src2 = DAG.getBitcast(VecT, Src2);
2635 }
2636
2637 int Mask[16];
2638 assert(DestLaneCount <= 16);
2639 for (size_t I = 0; I < DestLaneCount; ++I) {
2640 const SDValue &Lane = Op->getOperand(I);
2641 SDValue Src = GetShuffleSrc(Lane);
2642 if (Src == ShuffleSrc1) {
2643 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2644 } else if (Src && Src == ShuffleSrc2) {
2645 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2646 } else {
2647 Mask[I] = -1;
2648 }
2649 }
2650 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2651 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2652 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2653 auto Src = GetShuffleSrc(Lane);
2654 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2655 };
2656 } else if (NumConstantLanes >= NumSplatLanes) {
2657 SmallVector<SDValue, 16> ConstLanes;
2658 for (const SDValue &Lane : Op->op_values()) {
2659 if (IsConstant(Lane)) {
2660 // Values may need to be fixed so that they will sign extend to be
2661 // within the expected range during ISel. Check whether the value is in
2662 // bounds based on the lane bit width and if it is out of bounds, lop
2663 // off the extra bits and subtract 2^n to reflect giving the high bit
2664 // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2665 // cannot possibly be out of range.
2666 auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
2667 int64_t Val = Const ? Const->getSExtValue() : 0;
2668 uint64_t LaneBits = 128 / Lanes;
2669 assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
2670 "Unexpected out of bounds negative value");
2671 if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
2672 uint64_t Mask = (1ll << LaneBits) - 1;
2673 auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask;
2674 ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
2675 } else {
2676 ConstLanes.push_back(Lane);
2677 }
2678 } else if (LaneT.isFloatingPoint()) {
2679 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2680 } else {
2681 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2682 }
2683 }
2684 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2685 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2686 return IsConstant(Lane);
2687 };
2688 } else {
2689 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2690 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2691 (DestLaneSize == 32 || DestLaneSize == 64)) {
2692 // Could be selected to load_zero.
2693 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2694 } else {
2695 // Use a splat (which might be selected as a load splat)
2696 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2697 }
2698 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2699 return Lane == SplatValue;
2700 };
2701 }
2702
2703 assert(Result);
2704 assert(IsLaneConstructed);
2705
2706 // Add replace_lane instructions for any unhandled values
2707 for (size_t I = 0; I < Lanes; ++I) {
2708 const SDValue &Lane = Op->getOperand(I);
2709 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2710 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2711 DAG.getConstant(I, DL, MVT::i32));
2712 }
2713
2714 return Result;
2715}
2716
2717SDValue
2718WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2719 SelectionDAG &DAG) const {
2720 SDLoc DL(Op);
2721 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2722 MVT VecType = Op.getOperand(0).getSimpleValueType();
2723 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2724 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2725
2726 // Space for two vector args and sixteen mask indices
2727 SDValue Ops[18];
2728 size_t OpIdx = 0;
2729 Ops[OpIdx++] = Op.getOperand(0);
2730 Ops[OpIdx++] = Op.getOperand(1);
2731
2732 // Expand mask indices to byte indices and materialize them as operands
2733 for (int M : Mask) {
2734 for (size_t J = 0; J < LaneBytes; ++J) {
2735 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2736 // whole lane of vector input, to allow further reduction at VM. E.g.
2737 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2738 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2739 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2740 }
2741 }
2742
2743 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2744}
2745
2746SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2747 SelectionDAG &DAG) const {
2748 SDLoc DL(Op);
2749 // The legalizer does not know how to expand the unsupported comparison modes
2750 // of i64x2 vectors, so we manually unroll them here.
2751 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2753 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2754 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2755 const SDValue &CC = Op->getOperand(2);
2756 auto MakeLane = [&](unsigned I) {
2757 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2758 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2759 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2760 };
2761 return DAG.getBuildVector(Op->getValueType(0), DL,
2762 {MakeLane(0), MakeLane(1)});
2763}
2764
2765SDValue
2766WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2767 SelectionDAG &DAG) const {
2768 // Allow constant lane indices, expand variable lane indices
2769 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2770 if (isa<ConstantSDNode>(IdxNode)) {
2771 // Ensure the index type is i32 to match the tablegen patterns
2772 uint64_t Idx = IdxNode->getAsZExtVal();
2773 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2774 Ops[Op.getNumOperands() - 1] =
2775 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2776 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2777 }
2778 // Perform default expansion
2779 return SDValue();
2780}
2781
2783 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2784 // 32-bit and 64-bit unrolled shifts will have proper semantics
2785 if (LaneT.bitsGE(MVT::i32))
2786 return DAG.UnrollVectorOp(Op.getNode());
2787 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2788 SDLoc DL(Op);
2789 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2790 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2791 unsigned ShiftOpcode = Op.getOpcode();
2792 SmallVector<SDValue, 16> ShiftedElements;
2793 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2794 SmallVector<SDValue, 16> ShiftElements;
2795 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2796 SmallVector<SDValue, 16> UnrolledOps;
2797 for (size_t i = 0; i < NumLanes; ++i) {
2798 SDValue MaskedShiftValue =
2799 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2800 SDValue ShiftedValue = ShiftedElements[i];
2801 if (ShiftOpcode == ISD::SRA)
2802 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2803 ShiftedValue, DAG.getValueType(LaneT));
2804 UnrolledOps.push_back(
2805 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2806 }
2807 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2808}
2809
2810SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2811 SelectionDAG &DAG) const {
2812 SDLoc DL(Op);
2813
2814 // Only manually lower vector shifts
2815 assert(Op.getSimpleValueType().isVector());
2816
2817 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2818 auto ShiftVal = Op.getOperand(1);
2819
2820 // Try to skip bitmask operation since it is implied inside shift instruction
2821 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2822 if (MaskOp.getOpcode() != ISD::AND)
2823 return MaskOp;
2824 SDValue LHS = MaskOp.getOperand(0);
2825 SDValue RHS = MaskOp.getOperand(1);
2826 if (MaskOp.getValueType().isVector()) {
2827 APInt MaskVal;
2828 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2829 std::swap(LHS, RHS);
2830
2831 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2832 MaskVal == MaskBits)
2833 MaskOp = LHS;
2834 } else {
2835 if (!isa<ConstantSDNode>(RHS.getNode()))
2836 std::swap(LHS, RHS);
2837
2838 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2839 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2840 MaskOp = LHS;
2841 }
2842
2843 return MaskOp;
2844 };
2845
2846 // Skip vector and operation
2847 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2848 ShiftVal = DAG.getSplatValue(ShiftVal);
2849 if (!ShiftVal)
2850 return unrollVectorShift(Op, DAG);
2851
2852 // Skip scalar and operation
2853 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2854 // Use anyext because none of the high bits can affect the shift
2855 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2856
2857 unsigned Opcode;
2858 switch (Op.getOpcode()) {
2859 case ISD::SHL:
2860 Opcode = WebAssemblyISD::VEC_SHL;
2861 break;
2862 case ISD::SRA:
2863 Opcode = WebAssemblyISD::VEC_SHR_S;
2864 break;
2865 case ISD::SRL:
2866 Opcode = WebAssemblyISD::VEC_SHR_U;
2867 break;
2868 default:
2869 llvm_unreachable("unexpected opcode");
2870 }
2871
2872 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2873}
2874
2875SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2876 SelectionDAG &DAG) const {
2877 EVT ResT = Op.getValueType();
2878 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2879
2880 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2881 (SatVT == MVT::i32 || SatVT == MVT::i64))
2882 return Op;
2883
2884 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2885 return Op;
2886
2887 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2888 return Op;
2889
2890 return SDValue();
2891}
2892
2893//===----------------------------------------------------------------------===//
2894// Custom DAG combine hooks
2895//===----------------------------------------------------------------------===//
2896static SDValue
2898 auto &DAG = DCI.DAG;
2899 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2900
2901 // Hoist vector bitcasts that don't change the number of lanes out of unary
2902 // shuffles, where they are less likely to get in the way of other combines.
2903 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2904 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2905 SDValue Bitcast = N->getOperand(0);
2906 if (Bitcast.getOpcode() != ISD::BITCAST)
2907 return SDValue();
2908 if (!N->getOperand(1).isUndef())
2909 return SDValue();
2910 SDValue CastOp = Bitcast.getOperand(0);
2911 EVT SrcType = CastOp.getValueType();
2912 EVT DstType = Bitcast.getValueType();
2913 if (!SrcType.is128BitVector() ||
2914 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2915 return SDValue();
2916 SDValue NewShuffle = DAG.getVectorShuffle(
2917 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2918 return DAG.getBitcast(DstType, NewShuffle);
2919}
2920
2921/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2922/// split up into scalar instructions during legalization, and the vector
2923/// extending instructions are selected in performVectorExtendCombine below.
2924static SDValue
2927 auto &DAG = DCI.DAG;
2928 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2929 N->getOpcode() == ISD::SINT_TO_FP);
2930
2931 EVT InVT = N->getOperand(0)->getValueType(0);
2932 EVT ResVT = N->getValueType(0);
2933 MVT ExtVT;
2934 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2935 ExtVT = MVT::v4i32;
2936 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2937 ExtVT = MVT::v2i32;
2938 else
2939 return SDValue();
2940
2941 unsigned Op =
2943 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2944 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2945}
2946
2947static SDValue
2950 auto &DAG = DCI.DAG;
2951
2952 SDNodeFlags Flags = N->getFlags();
2953 SDValue Op0 = N->getOperand(0);
2954 EVT VT = N->getValueType(0);
2955
2956 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2957 // Depending on the target (runtime) backend, this might be performance
2958 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2959 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2960 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2961 }
2962
2963 return SDValue();
2964}
2965
2966static SDValue
2968 auto &DAG = DCI.DAG;
2969 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2970 N->getOpcode() == ISD::ZERO_EXTEND);
2971
2972 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2973 // possible before the extract_subvector can be expanded.
2974 auto Extract = N->getOperand(0);
2975 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2976 return SDValue();
2977 auto Source = Extract.getOperand(0);
2978 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2979 if (IndexNode == nullptr)
2980 return SDValue();
2981 auto Index = IndexNode->getZExtValue();
2982
2983 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2984 // extracted subvector is the low or high half of its source.
2985 EVT ResVT = N->getValueType(0);
2986 if (ResVT == MVT::v8i16) {
2987 if (Extract.getValueType() != MVT::v8i8 ||
2988 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2989 return SDValue();
2990 } else if (ResVT == MVT::v4i32) {
2991 if (Extract.getValueType() != MVT::v4i16 ||
2992 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2993 return SDValue();
2994 } else if (ResVT == MVT::v2i64) {
2995 if (Extract.getValueType() != MVT::v2i32 ||
2996 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2997 return SDValue();
2998 } else {
2999 return SDValue();
3000 }
3001
3002 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3003 bool IsLow = Index == 0;
3004
3005 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3006 : WebAssemblyISD::EXTEND_HIGH_S)
3007 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3008 : WebAssemblyISD::EXTEND_HIGH_U);
3009
3010 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3011}
3012
3013static SDValue
3015 auto &DAG = DCI.DAG;
3016
3017 auto GetWasmConversionOp = [](unsigned Op) {
3018 switch (Op) {
3020 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3022 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3023 case ISD::FP_ROUND:
3024 return WebAssemblyISD::DEMOTE_ZERO;
3025 }
3026 llvm_unreachable("unexpected op");
3027 };
3028
3029 auto IsZeroSplat = [](SDValue SplatVal) {
3030 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3031 APInt SplatValue, SplatUndef;
3032 unsigned SplatBitSize;
3033 bool HasAnyUndefs;
3034 // Endianness doesn't matter in this context because we are looking for
3035 // an all-zero value.
3036 return Splat &&
3037 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3038 HasAnyUndefs) &&
3039 SplatValue == 0;
3040 };
3041
3042 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3043 // Combine this:
3044 //
3045 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3046 //
3047 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3048 //
3049 // Or this:
3050 //
3051 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3052 //
3053 // into (f32x4.demote_zero_f64x2 $x).
3054 EVT ResVT;
3055 EVT ExpectedConversionType;
3056 auto Conversion = N->getOperand(0);
3057 auto ConversionOp = Conversion.getOpcode();
3058 switch (ConversionOp) {
3061 ResVT = MVT::v4i32;
3062 ExpectedConversionType = MVT::v2i32;
3063 break;
3064 case ISD::FP_ROUND:
3065 ResVT = MVT::v4f32;
3066 ExpectedConversionType = MVT::v2f32;
3067 break;
3068 default:
3069 return SDValue();
3070 }
3071
3072 if (N->getValueType(0) != ResVT)
3073 return SDValue();
3074
3075 if (Conversion.getValueType() != ExpectedConversionType)
3076 return SDValue();
3077
3078 auto Source = Conversion.getOperand(0);
3079 if (Source.getValueType() != MVT::v2f64)
3080 return SDValue();
3081
3082 if (!IsZeroSplat(N->getOperand(1)) ||
3083 N->getOperand(1).getValueType() != ExpectedConversionType)
3084 return SDValue();
3085
3086 unsigned Op = GetWasmConversionOp(ConversionOp);
3087 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3088 }
3089
3090 // Combine this:
3091 //
3092 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3093 //
3094 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3095 //
3096 // Or this:
3097 //
3098 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3099 //
3100 // into (f32x4.demote_zero_f64x2 $x).
3101 EVT ResVT;
3102 auto ConversionOp = N->getOpcode();
3103 switch (ConversionOp) {
3106 ResVT = MVT::v4i32;
3107 break;
3108 case ISD::FP_ROUND:
3109 ResVT = MVT::v4f32;
3110 break;
3111 default:
3112 llvm_unreachable("unexpected op");
3113 }
3114
3115 if (N->getValueType(0) != ResVT)
3116 return SDValue();
3117
3118 auto Concat = N->getOperand(0);
3119 if (Concat.getValueType() != MVT::v4f64)
3120 return SDValue();
3121
3122 auto Source = Concat.getOperand(0);
3123 if (Source.getValueType() != MVT::v2f64)
3124 return SDValue();
3125
3126 if (!IsZeroSplat(Concat.getOperand(1)) ||
3127 Concat.getOperand(1).getValueType() != MVT::v2f64)
3128 return SDValue();
3129
3130 unsigned Op = GetWasmConversionOp(ConversionOp);
3131 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3132}
3133
3134// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3135static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3136 const SDLoc &DL, unsigned VectorWidth) {
3137 EVT VT = Vec.getValueType();
3138 EVT ElVT = VT.getVectorElementType();
3139 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3140 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3141 VT.getVectorNumElements() / Factor);
3142
3143 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3144 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3145 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3146
3147 // This is the index of the first element of the VectorWidth-bit chunk
3148 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3149 IdxVal &= ~(ElemsPerChunk - 1);
3150
3151 // If the input is a buildvector just emit a smaller one.
3152 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3153 return DAG.getBuildVector(ResultVT, DL,
3154 Vec->ops().slice(IdxVal, ElemsPerChunk));
3155
3156 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3157 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3158}
3159
3160// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3161// is the expected destination value type after recursion. In is the initial
3162// input. Note that the input should have enough leading zero bits to prevent
3163// NARROW_U from saturating results.
3165 SelectionDAG &DAG) {
3166 EVT SrcVT = In.getValueType();
3167
3168 // No truncation required, we might get here due to recursive calls.
3169 if (SrcVT == DstVT)
3170 return In;
3171
3172 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3173 unsigned NumElems = SrcVT.getVectorNumElements();
3174 if (!isPowerOf2_32(NumElems))
3175 return SDValue();
3176 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3177 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3178
3179 LLVMContext &Ctx = *DAG.getContext();
3180 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3181
3182 // Narrow to the largest type possible:
3183 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3184 EVT InVT = MVT::i16, OutVT = MVT::i8;
3185 if (SrcVT.getScalarSizeInBits() > 16) {
3186 InVT = MVT::i32;
3187 OutVT = MVT::i16;
3188 }
3189 unsigned SubSizeInBits = SrcSizeInBits / 2;
3190 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3191 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3192
3193 // Split lower/upper subvectors.
3194 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3195 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3196
3197 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3198 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3199 Lo = DAG.getBitcast(InVT, Lo);
3200 Hi = DAG.getBitcast(InVT, Hi);
3201 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3202 return DAG.getBitcast(DstVT, Res);
3203 }
3204
3205 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3206 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3207 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3208 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3209
3210 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3211 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3212 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3213}
3214
3217 auto &DAG = DCI.DAG;
3218
3219 SDValue In = N->getOperand(0);
3220 EVT InVT = In.getValueType();
3221 if (!InVT.isSimple())
3222 return SDValue();
3223
3224 EVT OutVT = N->getValueType(0);
3225 if (!OutVT.isVector())
3226 return SDValue();
3227
3228 EVT OutSVT = OutVT.getVectorElementType();
3229 EVT InSVT = InVT.getVectorElementType();
3230 // Currently only cover truncate to v16i8 or v8i16.
3231 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3232 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3233 return SDValue();
3234
3235 SDLoc DL(N);
3237 OutVT.getScalarSizeInBits());
3238 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3239 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3240}
3241
3244 using namespace llvm::SDPatternMatch;
3245 auto &DAG = DCI.DAG;
3246 SDLoc DL(N);
3247 SDValue Src = N->getOperand(0);
3248 EVT VT = N->getValueType(0);
3249 EVT SrcVT = Src.getValueType();
3250
3251 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3252 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3253 return SDValue();
3254
3255 unsigned NumElts = SrcVT.getVectorNumElements();
3256 EVT Width = MVT::getIntegerVT(128 / NumElts);
3257
3258 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3259 // ==> bitmask
3260 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3261 return DAG.getZExtOrTrunc(
3262 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3263 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3264 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3265 SrcVT.changeVectorElementType(Width))}),
3266 DL, VT);
3267 }
3268
3269 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3270 if (NumElts == 32 || NumElts == 64) {
3271 // Strategy: We will setcc them seperately in v16i8 -> v16i1
3272 // Bitcast them to i16, extend them to either i32 or i64.
3273 // Add them together, shifting left 1 by 1.
3274 SDValue Concat, SetCCVector;
3275 ISD::CondCode SetCond;
3276
3277 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3278 m_CondCode(SetCond)))))
3279 return SDValue();
3280 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3281 return SDValue();
3282
3283 uint64_t ElementWidth =
3285
3286 SmallVector<SDValue> VectorsToShuffle;
3287 for (size_t I = 0; I < Concat->ops().size(); I++) {
3288 VectorsToShuffle.push_back(DAG.getBitcast(
3289 MVT::i16,
3290 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3291 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3292 DAG, DL, 128),
3293 SetCond)));
3294 }
3295
3296 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3297 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3298
3299 for (SDValue V : VectorsToShuffle) {
3300 ReturningInteger = DAG.getNode(
3301 ISD::SHL, DL, ReturnType,
3302 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3303
3304 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3305 ReturningInteger =
3306 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3307 }
3308
3309 return ReturningInteger;
3310 }
3311
3312 return SDValue();
3313}
3314
3316 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3317 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3318 // any_true (setcc <X>, 0, ne) => (any_true X)
3319 // all_true (setcc <X>, 0, ne) => (all_true X)
3320 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3321 using namespace llvm::SDPatternMatch;
3322
3323 SDValue LHS;
3324 if (!sd_match(N->getOperand(1),
3325 m_c_SetCC(m_Value(LHS), m_Zero(), m_CondCode())))
3326 return SDValue();
3327 EVT LT = LHS.getValueType();
3328 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3329 return SDValue();
3330
3331 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3332 ISD::CondCode SetType,
3333 Intrinsic::WASMIntrinsics InPost) {
3334 if (N->getConstantOperandVal(0) != InPre)
3335 return SDValue();
3336
3337 SDValue LHS;
3338 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3339 m_SpecificCondCode(SetType))))
3340 return SDValue();
3341
3342 SDLoc DL(N);
3343 SDValue Ret = DAG.getZExtOrTrunc(
3344 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3345 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3346 DL, MVT::i1);
3347 if (SetType == ISD::SETEQ)
3348 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3349 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3350 };
3351
3352 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3353 Intrinsic::wasm_alltrue))
3354 return AnyTrueEQ;
3355 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3356 Intrinsic::wasm_anytrue))
3357 return AllTrueEQ;
3358 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3359 Intrinsic::wasm_anytrue))
3360 return AnyTrueNE;
3361 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3362 Intrinsic::wasm_alltrue))
3363 return AllTrueNE;
3364
3365 return SDValue();
3366}
3367
3368template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3369 Intrinsic::ID Intrin>
3371 SDValue LHS = N->getOperand(0);
3372 SDValue RHS = N->getOperand(1);
3373 SDValue Cond = N->getOperand(2);
3374 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3375 return SDValue();
3376
3377 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3378 return SDValue();
3379
3380 SDLoc DL(N);
3381 SDValue Ret = DAG.getZExtOrTrunc(
3382 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3383 {DAG.getConstant(Intrin, DL, MVT::i32),
3384 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3385 DL, MVT::i1);
3386 if (RequiresNegate)
3387 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3388 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3389}
3390
3391/// Try to convert a i128 comparison to a v16i8 comparison before type
3392/// legalization splits it up into chunks
3393static SDValue
3395 const WebAssemblySubtarget *Subtarget) {
3396
3397 SDLoc DL(N);
3398 SDValue X = N->getOperand(0);
3399 SDValue Y = N->getOperand(1);
3400 EVT VT = N->getValueType(0);
3401 EVT OpVT = X.getValueType();
3402
3403 SelectionDAG &DAG = DCI.DAG;
3405 Attribute::NoImplicitFloat))
3406 return SDValue();
3407
3408 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3409 // We're looking for an oversized integer equality comparison with SIMD
3410 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3411 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3412 return SDValue();
3413
3414 // Don't perform this combine if constructing the vector will be expensive.
3415 auto IsVectorBitCastCheap = [](SDValue X) {
3417 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3418 };
3419
3420 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3421 return SDValue();
3422
3423 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3424 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3425 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3426
3427 SDValue Intr =
3428 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3429 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3430 : Intrinsic::wasm_anytrue,
3431 DL, MVT::i32),
3432 Cmp});
3433
3434 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3435 ISD::SETNE);
3436}
3437
3440 const WebAssemblySubtarget *Subtarget) {
3441 if (!DCI.isBeforeLegalize())
3442 return SDValue();
3443
3444 EVT VT = N->getValueType(0);
3445 if (!VT.isScalarInteger())
3446 return SDValue();
3447
3448 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3449 return V;
3450
3451 SDValue LHS = N->getOperand(0);
3452 if (LHS->getOpcode() != ISD::BITCAST)
3453 return SDValue();
3454
3455 EVT FromVT = LHS->getOperand(0).getValueType();
3456 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3457 return SDValue();
3458
3459 unsigned NumElts = FromVT.getVectorNumElements();
3460 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3461 return SDValue();
3462
3463 if (!cast<ConstantSDNode>(N->getOperand(1)))
3464 return SDValue();
3465
3466 EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts));
3467 auto &DAG = DCI.DAG;
3468 // setcc (iN (bitcast (vNi1 X))), 0, ne
3469 // ==> any_true (vNi1 X)
3470 if (auto Match = TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
3471 N, VecVT, DAG)) {
3472 return Match;
3473 }
3474 // setcc (iN (bitcast (vNi1 X))), 0, eq
3475 // ==> xor (any_true (vNi1 X)), -1
3476 if (auto Match = TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>(
3477 N, VecVT, DAG)) {
3478 return Match;
3479 }
3480 // setcc (iN (bitcast (vNi1 X))), -1, eq
3481 // ==> all_true (vNi1 X)
3482 if (auto Match = TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>(
3483 N, VecVT, DAG)) {
3484 return Match;
3485 }
3486 // setcc (iN (bitcast (vNi1 X))), -1, ne
3487 // ==> xor (all_true (vNi1 X)), -1
3488 if (auto Match = TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>(
3489 N, VecVT, DAG)) {
3490 return Match;
3491 }
3492 return SDValue();
3493}
3494
3496 EVT VT = N->getValueType(0);
3497 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3498 return SDValue();
3499
3500 // Mul with extending inputs.
3501 SDValue LHS = N->getOperand(0);
3502 SDValue RHS = N->getOperand(1);
3503 if (LHS.getOpcode() != RHS.getOpcode())
3504 return SDValue();
3505
3506 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3507 LHS.getOpcode() != ISD::ZERO_EXTEND)
3508 return SDValue();
3509
3510 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3511 return SDValue();
3512
3513 EVT FromVT = LHS->getOperand(0).getValueType();
3514 EVT EltTy = FromVT.getVectorElementType();
3515 if (EltTy != MVT::i8)
3516 return SDValue();
3517
3518 // For an input DAG that looks like this
3519 // %a = input_type
3520 // %b = input_type
3521 // %lhs = extend %a to output_type
3522 // %rhs = extend %b to output_type
3523 // %mul = mul %lhs, %rhs
3524
3525 // input_type | output_type | instructions
3526 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3527 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3528 // | | %low_low = i32x4.ext_low_i16x8_ %low
3529 // | | %low_high = i32x4.ext_high_i16x8_ %low
3530 // | | %high_low = i32x4.ext_low_i16x8_ %high
3531 // | | %high_high = i32x4.ext_high_i16x8_ %high
3532 // | | %res = concat_vector(...)
3533 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3534 // | | %low_low = i32x4.ext_low_i16x8_ %low
3535 // | | %low_high = i32x4.ext_high_i16x8_ %low
3536 // | | %res = concat_vector(%low_low, %low_high)
3537
3538 SDLoc DL(N);
3539 unsigned NumElts = VT.getVectorNumElements();
3540 SDValue ExtendInLHS = LHS->getOperand(0);
3541 SDValue ExtendInRHS = RHS->getOperand(0);
3542 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3543 unsigned ExtendLowOpc =
3544 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3545 unsigned ExtendHighOpc =
3546 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3547
3548 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3549 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3550 };
3551 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3552 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3553 };
3554
3555 if (NumElts == 16) {
3556 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3557 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3558 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3559 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3560 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3561 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3562 SDValue SubVectors[] = {
3563 GetExtendLow(MVT::v4i32, MulLow),
3564 GetExtendHigh(MVT::v4i32, MulLow),
3565 GetExtendLow(MVT::v4i32, MulHigh),
3566 GetExtendHigh(MVT::v4i32, MulHigh),
3567 };
3568 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3569 } else {
3570 assert(NumElts == 8);
3571 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3572 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3573 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3574 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3575 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3576 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3577 }
3578 return SDValue();
3579}
3580
3583 assert(N->getOpcode() == ISD::MUL);
3584 EVT VT = N->getValueType(0);
3585 if (!VT.isVector())
3586 return SDValue();
3587
3588 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3589 return Res;
3590
3591 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3592 // extend them to v8i16. Only do this before legalization in case a narrow
3593 // vector is widened and may be simplified later.
3594 if (!DCI.isBeforeLegalize() || (VT != MVT::v8i8 && VT != MVT::v16i8))
3595 return SDValue();
3596
3597 SDLoc DL(N);
3598 SelectionDAG &DAG = DCI.DAG;
3599 SDValue LHS = N->getOperand(0);
3600 SDValue RHS = N->getOperand(1);
3601 EVT MulVT = MVT::v8i16;
3602
3603 if (VT == MVT::v8i8) {
3604 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3605 DAG.getUNDEF(MVT::v8i8));
3606 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3607 DAG.getUNDEF(MVT::v8i8));
3608 SDValue LowLHS =
3609 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3610 SDValue LowRHS =
3611 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3612 SDValue MulLow = DAG.getBitcast(
3613 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3614 // Take the low byte of each lane.
3615 SDValue Shuffle = DAG.getVectorShuffle(
3616 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3617 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3618 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3619 } else {
3620 assert(VT == MVT::v16i8 && "Expected v16i8");
3621 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3622 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3623 SDValue HighLHS =
3624 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3625 SDValue HighRHS =
3626 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3627
3628 SDValue MulLow =
3629 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3630 SDValue MulHigh =
3631 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3632
3633 // Take the low byte of each lane.
3634 return DAG.getVectorShuffle(
3635 VT, DL, MulLow, MulHigh,
3636 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3637 }
3638}
3639
3640SDValue
3641WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3642 DAGCombinerInfo &DCI) const {
3643 switch (N->getOpcode()) {
3644 default:
3645 return SDValue();
3646 case ISD::BITCAST:
3647 return performBitcastCombine(N, DCI);
3648 case ISD::SETCC:
3649 return performSETCCCombine(N, DCI, Subtarget);
3651 return performVECTOR_SHUFFLECombine(N, DCI);
3652 case ISD::SIGN_EXTEND:
3653 case ISD::ZERO_EXTEND:
3654 return performVectorExtendCombine(N, DCI);
3655 case ISD::UINT_TO_FP:
3656 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3657 return ExtCombine;
3658 return performVectorNonNegToFPCombine(N, DCI);
3659 case ISD::SINT_TO_FP:
3660 return performVectorExtendToFPCombine(N, DCI);
3663 case ISD::FP_ROUND:
3665 return performVectorTruncZeroCombine(N, DCI);
3666 case ISD::TRUNCATE:
3667 return performTruncateCombine(N, DCI);
3669 if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
3670 return AnyAllCombine;
3671 return performLowerPartialReduction(N, DCI.DAG);
3672 }
3673 case ISD::MUL:
3674 return performMulCombine(N, DCI);
3675 }
3676}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
unsigned Intr
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
MachineInstr unsigned OpIdx
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1385
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
@ Add
*p = old + v
Definition: Instructions.h:725
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ Xor
*p = old ^ v
Definition: Instructions.h:735
BinOp getOperation() const
Definition: Instructions.h:819
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
CCState - This class holds information needed while lowering arguments and return values.
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
This class represents a function call, abstracting a target machine's calling convention.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:124
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Definition: GlobalValue.h:273
Type * getValueType() const
Definition: GlobalValue.h:298
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
void setNoStrip() const
Definition: MCSymbolWasm.h:67
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:247
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:72
mop_range defs()
Returns all explicit operands that are register definitions.
Definition: MachineInstr.h:724
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
Definition: MachineInstr.h:731
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:720
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:813
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:763
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:499
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:885
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isOSEmscripten() const
Tests whether the OS is Emscripten.
Definition: Triple.h:745
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:258
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:705
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
This class is derived from MachineFunctionInfo and contains private WebAssembly-specific information ...
Register getFrameRegister(const MachineFunction &MF) const override
const Triple & getTargetTriple() const
const WebAssemblyInstrInfo * getInstrInfo() const override
const WebAssemblyRegisterInfo * getRegisterInfo() const override
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition: ilist_node.h:134
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
Definition: CallingConv.h:229
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1236
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1232
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:765
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1265
@ ConstantFP
Definition: ISDOpcodes.h:87
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:289
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:985
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1568
@ GlobalTLSAddress
Definition: ISDOpcodes.h:89
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:656
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1090
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1162
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1166
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:369
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1261
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1321
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:928
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1151
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:718
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:960
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1081
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1318
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:53
@ ExternalSymbol
Definition: ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1256
@ BlockAddress
Definition: ISDOpcodes.h:94
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:360
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1756
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition: MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isValidAddressSpace(unsigned AS)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:349
@ Offset
Definition: DWP.cpp:477
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
@ Mul
Product of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2049
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:378
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:299
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isInConsecutiveRegs() const
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1481