LLVM 21.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v2s8 = LLT::fixed_vector(2, 8);
54 const LLT v8s16 = LLT::fixed_vector(8, 16);
55 const LLT v4s16 = LLT::fixed_vector(4, 16);
56 const LLT v2s16 = LLT::fixed_vector(2, 16);
57 const LLT v2s32 = LLT::fixed_vector(2, 32);
58 const LLT v4s32 = LLT::fixed_vector(4, 32);
59 const LLT v2s64 = LLT::fixed_vector(2, 64);
60 const LLT v2p0 = LLT::fixed_vector(2, p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
63 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
64 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
65 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
95 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor({p0, s8, s16, s32, s64})
97 .legalFor({v16s8, v8s16, v4s32, v2s64, v2p0, v8s8, v4s16, v2s32, v4s8,
98 v2s16, v2s8})
99 .widenScalarToNextPow2(0)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampMaxNumElements(0, s64, 2)
107 .clampMaxNumElements(0, p0, 2)
109
111 .legalFor({p0, s16, s32, s64})
112 .legalFor(PackedVectorAllTypeList)
116 .clampScalar(0, s16, s64)
117 .clampNumElements(0, v8s8, v16s8)
118 .clampNumElements(0, v4s16, v8s16)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampMaxNumElements(0, s64, 2)
121 .clampMaxNumElements(0, p0, 2);
122
124 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
126 .clampScalar(0, s32, s64)
127 .clampNumElements(0, v4s16, v8s16)
128 .clampNumElements(0, v2s32, v4s32)
129 .clampNumElements(0, v2s64, v2s64)
130 .moreElementsToNextPow2(0);
131
132 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
133 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
134 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
135 .widenScalarToNextPow2(0)
136 .clampScalar(0, s32, s64)
137 .clampMaxNumElements(0, s8, 16)
138 .clampMaxNumElements(0, s16, 8)
139 .clampNumElements(0, v2s32, v4s32)
140 .clampNumElements(0, v2s64, v2s64)
142 [=](const LegalityQuery &Query) {
143 return Query.Types[0].getNumElements() <= 2;
144 },
145 0, s32)
146 .minScalarOrEltIf(
147 [=](const LegalityQuery &Query) {
148 return Query.Types[0].getNumElements() <= 4;
149 },
150 0, s16)
151 .minScalarOrEltIf(
152 [=](const LegalityQuery &Query) {
153 return Query.Types[0].getNumElements() <= 16;
154 },
155 0, s8)
156 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
158
160 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
161 .widenScalarToNextPow2(0)
162 .clampScalar(0, s32, s64)
163 .clampMaxNumElements(0, s8, 16)
164 .clampMaxNumElements(0, s16, 8)
165 .clampNumElements(0, v2s32, v4s32)
166 .clampNumElements(0, v2s64, v2s64)
168 [=](const LegalityQuery &Query) {
169 return Query.Types[0].getNumElements() <= 2;
170 },
171 0, s32)
172 .minScalarOrEltIf(
173 [=](const LegalityQuery &Query) {
174 return Query.Types[0].getNumElements() <= 4;
175 },
176 0, s16)
177 .minScalarOrEltIf(
178 [=](const LegalityQuery &Query) {
179 return Query.Types[0].getNumElements() <= 16;
180 },
181 0, s8)
182 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
184
185 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
186 .customIf([=](const LegalityQuery &Query) {
187 const auto &SrcTy = Query.Types[0];
188 const auto &AmtTy = Query.Types[1];
189 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
190 AmtTy.getSizeInBits() == 32;
191 })
192 .legalFor({
193 {s32, s32},
194 {s32, s64},
195 {s64, s64},
196 {v8s8, v8s8},
197 {v16s8, v16s8},
198 {v4s16, v4s16},
199 {v8s16, v8s16},
200 {v2s32, v2s32},
201 {v4s32, v4s32},
202 {v2s64, v2s64},
203 })
204 .widenScalarToNextPow2(0)
205 .clampScalar(1, s32, s64)
206 .clampScalar(0, s32, s64)
207 .clampNumElements(0, v8s8, v16s8)
208 .clampNumElements(0, v4s16, v8s16)
209 .clampNumElements(0, v2s32, v4s32)
210 .clampNumElements(0, v2s64, v2s64)
212 .minScalarSameAs(1, 0)
214
216 .legalFor({{p0, s64}, {v2p0, v2s64}})
217 .clampScalarOrElt(1, s64, s64)
218 .clampNumElements(0, v2p0, v2p0);
219
220 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
221
222 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
223 .legalFor({s32, s64})
224 .libcallFor({s128})
225 .clampScalar(0, s32, s64)
227 .scalarize(0);
228
229 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
230 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
231 .libcallFor({s128})
233 .minScalarOrElt(0, s32)
234 .clampNumElements(0, v2s32, v4s32)
235 .clampNumElements(0, v2s64, v2s64)
236 .scalarize(0);
237
238 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
239 .widenScalarToNextPow2(0, /*Min = */ 32)
240 .clampScalar(0, s32, s64)
241 .lower();
242
243 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
244 .legalFor({s64, v8s16, v16s8, v4s32})
245 .lower();
246
247 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
248 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
249 .legalFor(HasCSSC, {s32, s64})
250 .minScalar(HasCSSC, 0, s32)
251 .clampNumElements(0, v8s8, v16s8)
252 .clampNumElements(0, v4s16, v8s16)
253 .clampNumElements(0, v2s32, v4s32)
254 // FIXME: This sholdn't be needed as v2s64 types are going to
255 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
256 .clampNumElements(0, v2s64, v2s64)
257 .lower();
258
260 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
261 .legalFor({{s32, s32}, {s64, s32}})
262 .clampScalar(0, s32, s64)
263 .clampScalar(1, s32, s64)
265
267 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
268 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
269 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
270 .legalFor({s32, s64, v2s32, v4s32, v2s64})
271 .legalFor(HasFP16, {s16, v4s16, v8s16})
272 .libcallFor({s128})
273 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
274 .minScalarOrElt(0, MinFPScalar)
275 .clampNumElements(0, v4s16, v8s16)
276 .clampNumElements(0, v2s32, v4s32)
277 .clampNumElements(0, v2s64, v2s64)
279
280 getActionDefinitionsBuilder({G_FABS, G_FNEG})
281 .legalFor({s32, s64, v2s32, v4s32, v2s64})
282 .legalFor(HasFP16, {s16, v4s16, v8s16})
283 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
285 .clampNumElements(0, v4s16, v8s16)
286 .clampNumElements(0, v2s32, v4s32)
287 .clampNumElements(0, v2s64, v2s64)
289 .lowerFor({s16, v4s16, v8s16});
290
292 .libcallFor({s32, s64, s128})
293 .minScalar(0, s32)
294 .scalarize(0);
295
296 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
297 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
298 .libcallFor({{s64, s128}})
299 .minScalarOrElt(1, MinFPScalar);
300
301 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
302 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
303 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
304 G_FSINH, G_FTANH})
305 // We need a call for these, so we always need to scalarize.
306 .scalarize(0)
307 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
308 .minScalar(0, s32)
309 .libcallFor({s32, s64, s128});
311 .scalarize(0)
312 .minScalar(0, s32)
313 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
314
316 .legalIf(all(typeInSet(0, {s32, s64, p0}),
317 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
319 .clampScalar(0, s32, s64)
321 .minScalar(1, s8)
322 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
323 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
324
326 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
327 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
329 .clampScalar(1, s32, s128)
331 .minScalar(0, s16)
332 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
333 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
334 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
335
336
337 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
338 auto &Actions = getActionDefinitionsBuilder(Op);
339
340 if (Op == G_SEXTLOAD)
342
343 // Atomics have zero extending behavior.
344 Actions
345 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
346 {s32, p0, s16, 8},
347 {s32, p0, s32, 8},
348 {s64, p0, s8, 2},
349 {s64, p0, s16, 2},
350 {s64, p0, s32, 4},
351 {s64, p0, s64, 8},
352 {p0, p0, s64, 8},
353 {v2s32, p0, s64, 8}})
354 .widenScalarToNextPow2(0)
355 .clampScalar(0, s32, s64)
356 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
357 // how to do that yet.
358 .unsupportedIfMemSizeNotPow2()
359 // Lower anything left over into G_*EXT and G_LOAD
360 .lower();
361 }
362
363 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
364 const LLT &ValTy = Query.Types[0];
365 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
366 };
367
369 .customIf([=](const LegalityQuery &Query) {
370 return HasRCPC3 && Query.Types[0] == s128 &&
371 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
372 })
373 .customIf([=](const LegalityQuery &Query) {
374 return Query.Types[0] == s128 &&
375 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
376 })
377 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
378 {s16, p0, s16, 8},
379 {s32, p0, s32, 8},
380 {s64, p0, s64, 8},
381 {p0, p0, s64, 8},
382 {s128, p0, s128, 8},
383 {v8s8, p0, s64, 8},
384 {v16s8, p0, s128, 8},
385 {v4s16, p0, s64, 8},
386 {v8s16, p0, s128, 8},
387 {v2s32, p0, s64, 8},
388 {v4s32, p0, s128, 8},
389 {v2s64, p0, s128, 8}})
390 // These extends are also legal
391 .legalForTypesWithMemDesc(
392 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
393 .legalForTypesWithMemDesc({
394 // SVE vscale x 128 bit base sizes
395 {nxv16s8, p0, nxv16s8, 8},
396 {nxv8s16, p0, nxv8s16, 8},
397 {nxv4s32, p0, nxv4s32, 8},
398 {nxv2s64, p0, nxv2s64, 8},
399 })
400 .widenScalarToNextPow2(0, /* MinSize = */ 8)
401 .clampMaxNumElements(0, s8, 16)
402 .clampMaxNumElements(0, s16, 8)
403 .clampMaxNumElements(0, s32, 4)
404 .clampMaxNumElements(0, s64, 2)
405 .clampMaxNumElements(0, p0, 2)
407 .clampScalar(0, s8, s64)
409 [=](const LegalityQuery &Query) {
410 // Clamp extending load results to 32-bits.
411 return Query.Types[0].isScalar() &&
412 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
413 Query.Types[0].getSizeInBits() > 32;
414 },
415 changeTo(0, s32))
416 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
417 .bitcastIf(typeInSet(0, {v4s8}),
418 [=](const LegalityQuery &Query) {
419 const LLT VecTy = Query.Types[0];
420 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
421 })
422 .customIf(IsPtrVecPred)
423 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
425
427 .customIf([=](const LegalityQuery &Query) {
428 return HasRCPC3 && Query.Types[0] == s128 &&
429 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
430 })
431 .customIf([=](const LegalityQuery &Query) {
432 return Query.Types[0] == s128 &&
433 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
434 })
435 .legalForTypesWithMemDesc(
436 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
437 {s32, p0, s8, 8}, // truncstorei8 from s32
438 {s64, p0, s8, 8}, // truncstorei8 from s64
439 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
440 {s64, p0, s16, 8}, // truncstorei16 from s64
441 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
442 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
443 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
444 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
445 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
446 .legalForTypesWithMemDesc({
447 // SVE vscale x 128 bit base sizes
448 // TODO: Add nxv2p0. Consider bitcastIf.
449 // See #92130
450 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
451 {nxv16s8, p0, nxv16s8, 8},
452 {nxv8s16, p0, nxv8s16, 8},
453 {nxv4s32, p0, nxv4s32, 8},
454 {nxv2s64, p0, nxv2s64, 8},
455 })
456 .clampScalar(0, s8, s64)
457 .minScalarOrElt(0, s8)
458 .lowerIf([=](const LegalityQuery &Query) {
459 return Query.Types[0].isScalar() &&
460 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
461 })
462 // Maximum: sN * k = 128
463 .clampMaxNumElements(0, s8, 16)
464 .clampMaxNumElements(0, s16, 8)
465 .clampMaxNumElements(0, s32, 4)
466 .clampMaxNumElements(0, s64, 2)
467 .clampMaxNumElements(0, p0, 2)
469 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
470 .bitcastIf(all(typeInSet(0, {v4s8}),
471 LegalityPredicate([=](const LegalityQuery &Query) {
472 return Query.Types[0].getSizeInBits() ==
473 Query.MMODescrs[0].MemoryTy.getSizeInBits();
474 })),
475 [=](const LegalityQuery &Query) {
476 const LLT VecTy = Query.Types[0];
477 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
478 })
479 .customIf(IsPtrVecPred)
480 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
482 .lower();
483
484 getActionDefinitionsBuilder(G_INDEXED_STORE)
485 // Idx 0 == Ptr, Idx 1 == Val
486 // TODO: we can implement legalizations but as of now these are
487 // generated in a very specific way.
489 {p0, s8, s8, 8},
490 {p0, s16, s16, 8},
491 {p0, s32, s8, 8},
492 {p0, s32, s16, 8},
493 {p0, s32, s32, 8},
494 {p0, s64, s64, 8},
495 {p0, p0, p0, 8},
496 {p0, v8s8, v8s8, 8},
497 {p0, v16s8, v16s8, 8},
498 {p0, v4s16, v4s16, 8},
499 {p0, v8s16, v8s16, 8},
500 {p0, v2s32, v2s32, 8},
501 {p0, v4s32, v4s32, 8},
502 {p0, v2s64, v2s64, 8},
503 {p0, v2p0, v2p0, 8},
504 {p0, s128, s128, 8},
505 })
506 .unsupported();
507
508 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
509 LLT LdTy = Query.Types[0];
510 LLT PtrTy = Query.Types[1];
511 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
512 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
513 return false;
514 if (PtrTy != p0)
515 return false;
516 return true;
517 };
518 getActionDefinitionsBuilder(G_INDEXED_LOAD)
521 .legalIf(IndexedLoadBasicPred)
522 .unsupported();
523 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
524 .unsupportedIf(
526 .legalIf(all(typeInSet(0, {s16, s32, s64}),
527 LegalityPredicate([=](const LegalityQuery &Q) {
528 LLT LdTy = Q.Types[0];
529 LLT PtrTy = Q.Types[1];
530 LLT MemTy = Q.MMODescrs[0].MemoryTy;
531 if (PtrTy != p0)
532 return false;
533 if (LdTy == s16)
534 return MemTy == s8;
535 if (LdTy == s32)
536 return MemTy == s8 || MemTy == s16;
537 if (LdTy == s64)
538 return MemTy == s8 || MemTy == s16 || MemTy == s32;
539 return false;
540 })))
541 .unsupported();
542
543 // Constants
545 .legalFor({p0, s8, s16, s32, s64})
546 .widenScalarToNextPow2(0)
547 .clampScalar(0, s8, s64);
548 getActionDefinitionsBuilder(G_FCONSTANT)
549 .legalFor({s32, s64, s128})
550 .legalFor(HasFP16, {s16})
551 .clampScalar(0, MinFPScalar, s128);
552
553 // FIXME: fix moreElementsToNextPow2
555 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
557 .clampScalar(1, s32, s64)
558 .clampScalar(0, s32, s32)
559 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
560 .minScalarEltSameAsIf(
561 [=](const LegalityQuery &Query) {
562 const LLT &Ty = Query.Types[0];
563 const LLT &SrcTy = Query.Types[1];
564 return Ty.isVector() && !SrcTy.isPointerVector() &&
565 Ty.getElementType() != SrcTy.getElementType();
566 },
567 0, 1)
568 .minScalarOrEltIf(
569 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
570 1, s32)
571 .minScalarOrEltIf(
572 [=](const LegalityQuery &Query) {
573 return Query.Types[1].isPointerVector();
574 },
575 0, s64)
577 .clampNumElements(1, v8s8, v16s8)
578 .clampNumElements(1, v4s16, v8s16)
579 .clampNumElements(1, v2s32, v4s32)
580 .clampNumElements(1, v2s64, v2s64)
581 .clampNumElements(1, v2p0, v2p0)
582 .customIf(isVector(0));
583
585 .legalFor({{s32, s32},
586 {s32, s64},
587 {v4s32, v4s32},
588 {v2s32, v2s32},
589 {v2s64, v2s64}})
590 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
592 .clampScalar(0, s32, s32)
593 .minScalarOrElt(1, MinFPScalar)
594 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
595 .minScalarEltSameAsIf(
596 [=](const LegalityQuery &Query) {
597 const LLT &Ty = Query.Types[0];
598 const LLT &SrcTy = Query.Types[1];
599 return Ty.isVector() && !SrcTy.isPointerVector() &&
600 Ty.getElementType() != SrcTy.getElementType();
601 },
602 0, 1)
603 .clampNumElements(1, v4s16, v8s16)
604 .clampNumElements(1, v2s32, v4s32)
605 .clampMaxNumElements(1, s64, 2)
606 .moreElementsToNextPow2(1)
607 .libcallFor({{s32, s128}});
608
609 // Extensions
610 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
611 unsigned DstSize = Query.Types[0].getSizeInBits();
612
613 // Handle legal vectors using legalFor
614 if (Query.Types[0].isVector())
615 return false;
616
617 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
618 return false; // Extending to a scalar s128 needs narrowing.
619
620 const LLT &SrcTy = Query.Types[1];
621
622 // Make sure we fit in a register otherwise. Don't bother checking that
623 // the source type is below 128 bits. We shouldn't be allowing anything
624 // through which is wider than the destination in the first place.
625 unsigned SrcSize = SrcTy.getSizeInBits();
626 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
627 return false;
628
629 return true;
630 };
631 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
632 .legalIf(ExtLegalFunc)
633 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
634 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
636 .clampMaxNumElements(1, s8, 8)
637 .clampMaxNumElements(1, s16, 4)
638 .clampMaxNumElements(1, s32, 2)
639 // Tries to convert a large EXTEND into two smaller EXTENDs
640 .lowerIf([=](const LegalityQuery &Query) {
641 return (Query.Types[0].getScalarSizeInBits() >
642 Query.Types[1].getScalarSizeInBits() * 2) &&
643 Query.Types[0].isVector() &&
644 (Query.Types[1].getScalarSizeInBits() == 8 ||
645 Query.Types[1].getScalarSizeInBits() == 16);
646 })
647 .clampMinNumElements(1, s8, 8)
648 .clampMinNumElements(1, s16, 4);
649
651 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
653 .clampMaxNumElements(0, s8, 8)
654 .clampMaxNumElements(0, s16, 4)
655 .clampMaxNumElements(0, s32, 2)
656 .minScalarOrEltIf(
657 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
658 0, s8)
659 .lowerIf([=](const LegalityQuery &Query) {
660 LLT DstTy = Query.Types[0];
661 LLT SrcTy = Query.Types[1];
662 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
663 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
664 })
665 .clampMinNumElements(0, s8, 8)
666 .clampMinNumElements(0, s16, 4)
667 .alwaysLegal();
668
669 getActionDefinitionsBuilder(G_SEXT_INREG)
670 .legalFor({s32, s64})
671 .legalFor(PackedVectorAllTypeList)
672 .maxScalar(0, s64)
673 .clampNumElements(0, v8s8, v16s8)
674 .clampNumElements(0, v4s16, v8s16)
675 .clampNumElements(0, v2s32, v4s32)
676 .clampMaxNumElements(0, s64, 2)
677 .lower();
678
679 // FP conversions
681 .legalFor(
682 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
683 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
684 .clampNumElements(0, v4s16, v4s16)
685 .clampNumElements(0, v2s32, v2s32)
686 .scalarize(0);
687
689 .legalFor(
690 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
691 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
692 .clampNumElements(0, v4s32, v4s32)
693 .clampNumElements(0, v2s64, v2s64)
694 .scalarize(0);
695
696 // Conversions
697 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
698 .legalFor({{s32, s32},
699 {s64, s32},
700 {s32, s64},
701 {s64, s64},
702 {v2s64, v2s64},
703 {v4s32, v4s32},
704 {v2s32, v2s32}})
705 .legalFor(HasFP16,
706 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
707 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
709 // The range of a fp16 value fits into an i17, so we can lower the width
710 // to i64.
712 [=](const LegalityQuery &Query) {
713 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
714 },
715 changeTo(0, s64))
717 .widenScalarOrEltToNextPow2OrMinSize(0)
718 .minScalar(0, s32)
719 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
720 .widenScalarIf(
721 [=](const LegalityQuery &Query) {
722 return Query.Types[0].getScalarSizeInBits() <= 64 &&
723 Query.Types[0].getScalarSizeInBits() >
724 Query.Types[1].getScalarSizeInBits();
725 },
727 .widenScalarIf(
728 [=](const LegalityQuery &Query) {
729 return Query.Types[1].getScalarSizeInBits() <= 64 &&
730 Query.Types[0].getScalarSizeInBits() <
731 Query.Types[1].getScalarSizeInBits();
732 },
734 .clampNumElements(0, v4s16, v8s16)
735 .clampNumElements(0, v2s32, v4s32)
736 .clampMaxNumElements(0, s64, 2)
737 .libcallFor(
738 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
739
740 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
741 .legalFor({{s32, s32},
742 {s64, s32},
743 {s32, s64},
744 {s64, s64},
745 {v2s64, v2s64},
746 {v4s32, v4s32},
747 {v2s32, v2s32}})
748 .legalFor(HasFP16,
749 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
750 // Handle types larger than i64 by scalarizing/lowering.
751 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
753 // The range of a fp16 value fits into an i17, so we can lower the width
754 // to i64.
756 [=](const LegalityQuery &Query) {
757 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
758 },
759 changeTo(0, s64))
760 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
762 .widenScalarToNextPow2(0, /*MinSize=*/32)
763 .minScalar(0, s32)
764 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
765 .widenScalarIf(
766 [=](const LegalityQuery &Query) {
767 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
768 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
769 ITySize > Query.Types[1].getScalarSizeInBits();
770 },
772 .widenScalarIf(
773 [=](const LegalityQuery &Query) {
774 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
775 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
776 Query.Types[0].getScalarSizeInBits() < FTySize;
777 },
780 .clampNumElements(0, v4s16, v8s16)
781 .clampNumElements(0, v2s32, v4s32)
782 .clampMaxNumElements(0, s64, 2);
783
784 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
785 .legalFor({{s32, s32},
786 {s64, s32},
787 {s32, s64},
788 {s64, s64},
789 {v2s64, v2s64},
790 {v4s32, v4s32},
791 {v2s32, v2s32}})
792 .legalFor(HasFP16,
793 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
794 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
798 .minScalar(1, s32)
799 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
801 [=](const LegalityQuery &Query) {
802 return Query.Types[1].getScalarSizeInBits() <= 64 &&
803 Query.Types[0].getScalarSizeInBits() <
804 Query.Types[1].getScalarSizeInBits();
805 },
807 .widenScalarIf(
808 [=](const LegalityQuery &Query) {
809 return Query.Types[0].getScalarSizeInBits() <= 64 &&
810 Query.Types[0].getScalarSizeInBits() >
811 Query.Types[1].getScalarSizeInBits();
812 },
814 .clampNumElements(0, v4s16, v8s16)
815 .clampNumElements(0, v2s32, v4s32)
816 .clampMaxNumElements(0, s64, 2)
817 .libcallFor({{s16, s128},
818 {s32, s128},
819 {s64, s128},
820 {s128, s128},
821 {s128, s32},
822 {s128, s64}});
823
824 // Control-flow
826 .legalFor({s32})
827 .clampScalar(0, s32, s32);
828 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
829
831 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
832 .widenScalarToNextPow2(0)
833 .clampScalar(0, s32, s64)
834 .clampScalar(1, s32, s32)
837 .lowerIf(isVector(0));
838
839 // Pointer-handling
840 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
841
842 if (TM.getCodeModel() == CodeModel::Small)
843 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
844 else
845 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
846
847 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
848 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
849
851 .legalFor({{s64, p0}, {v2s64, v2p0}})
852 .widenScalarToNextPow2(0, 64)
853 .clampScalar(0, s64, s64)
854 .clampMaxNumElements(0, s64, 2);
855
857 .unsupportedIf([&](const LegalityQuery &Query) {
858 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
859 })
860 .legalFor({{p0, s64}, {v2p0, v2s64}})
861 .clampMaxNumElements(1, s64, 2);
862
863 // Casts for 32 and 64-bit width type are just copies.
864 // Same for 128-bit width type, except they are on the FPR bank.
866 // Keeping 32-bit instructions legal to prevent regression in some tests
867 .legalForCartesianProduct({s32, v2s16, v4s8})
868 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
869 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
870 .customIf([=](const LegalityQuery &Query) {
871 // Handle casts from i1 vectors to scalars.
872 LLT DstTy = Query.Types[0];
873 LLT SrcTy = Query.Types[1];
874 return DstTy.isScalar() && SrcTy.isVector() &&
875 SrcTy.getScalarSizeInBits() == 1;
876 })
877 .lowerIf([=](const LegalityQuery &Query) {
878 return Query.Types[0].isVector() != Query.Types[1].isVector();
879 })
881 .clampNumElements(0, v8s8, v16s8)
882 .clampNumElements(0, v4s16, v8s16)
883 .clampNumElements(0, v2s32, v4s32)
884 .lower();
885
886 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
887
888 // va_list must be a pointer, but most sized types are pretty easy to handle
889 // as the destination.
891 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
892 .clampScalar(0, s8, s64)
893 .widenScalarToNextPow2(0, /*Min*/ 8);
894
895 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
896 .lowerIf(
897 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
898
899 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
900
901 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
902 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
903 .customFor(!UseOutlineAtomics, {{s128, p0}})
904 .libcallFor(UseOutlineAtomics,
905 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
906 .clampScalar(0, s32, s64);
907
908 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
909 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
910 G_ATOMICRMW_XOR})
911 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
912 .libcallFor(UseOutlineAtomics,
913 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
914 .clampScalar(0, s32, s64);
915
916 // Do not outline these atomics operations, as per comment in
917 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
919 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
920 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
921 .clampScalar(0, s32, s64);
922
923 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
924
925 // Merge/Unmerge
926 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
927 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
928 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
930 .widenScalarToNextPow2(LitTyIdx, 8)
931 .widenScalarToNextPow2(BigTyIdx, 32)
932 .clampScalar(LitTyIdx, s8, s64)
933 .clampScalar(BigTyIdx, s32, s128)
934 .legalIf([=](const LegalityQuery &Q) {
935 switch (Q.Types[BigTyIdx].getSizeInBits()) {
936 case 32:
937 case 64:
938 case 128:
939 break;
940 default:
941 return false;
942 }
943 switch (Q.Types[LitTyIdx].getSizeInBits()) {
944 case 8:
945 case 16:
946 case 32:
947 case 64:
948 return true;
949 default:
950 return false;
951 }
952 });
953 }
954
955 // TODO : nxv4s16, nxv2s16, nxv2s32
956 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
957 .legalFor(HasSVE, {{s16, nxv16s8, s64},
958 {s16, nxv8s16, s64},
959 {s32, nxv4s32, s64},
960 {s64, nxv2s64, s64}})
961 .unsupportedIf([=](const LegalityQuery &Query) {
962 const LLT &EltTy = Query.Types[1].getElementType();
963 if (Query.Types[1].isScalableVector())
964 return false;
965 return Query.Types[0] != EltTy;
966 })
967 .minScalar(2, s64)
968 .customIf([=](const LegalityQuery &Query) {
969 const LLT &VecTy = Query.Types[1];
970 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
971 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
972 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
973 })
974 .minScalarOrEltIf(
975 [=](const LegalityQuery &Query) {
976 // We want to promote to <M x s1> to <M x s64> if that wouldn't
977 // cause the total vec size to be > 128b.
978 return Query.Types[1].isFixedVector() &&
979 Query.Types[1].getNumElements() <= 2;
980 },
981 0, s64)
982 .minScalarOrEltIf(
983 [=](const LegalityQuery &Query) {
984 return Query.Types[1].isFixedVector() &&
985 Query.Types[1].getNumElements() <= 4;
986 },
987 0, s32)
988 .minScalarOrEltIf(
989 [=](const LegalityQuery &Query) {
990 return Query.Types[1].isFixedVector() &&
991 Query.Types[1].getNumElements() <= 8;
992 },
993 0, s16)
994 .minScalarOrEltIf(
995 [=](const LegalityQuery &Query) {
996 return Query.Types[1].isFixedVector() &&
997 Query.Types[1].getNumElements() <= 16;
998 },
999 0, s8)
1000 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1002 .clampMaxNumElements(1, s64, 2)
1003 .clampMaxNumElements(1, s32, 4)
1004 .clampMaxNumElements(1, s16, 8)
1005 .clampMaxNumElements(1, s8, 16)
1006 .clampMaxNumElements(1, p0, 2);
1007
1008 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1009 .legalIf(
1010 typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
1011 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1012 {nxv8s16, s32, s64},
1013 {nxv4s32, s32, s64},
1014 {nxv2s64, s64, s64}})
1016 .widenVectorEltsToVectorMinSize(0, 64)
1017 .clampNumElements(0, v8s8, v16s8)
1018 .clampNumElements(0, v4s16, v8s16)
1019 .clampNumElements(0, v2s32, v4s32)
1020 .clampMaxNumElements(0, s64, 2)
1021 .clampMaxNumElements(0, p0, 2);
1022
1023 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1024 .legalFor({{v8s8, s8},
1025 {v16s8, s8},
1026 {v4s16, s16},
1027 {v8s16, s16},
1028 {v2s32, s32},
1029 {v4s32, s32},
1030 {v2p0, p0},
1031 {v2s64, s64}})
1032 .clampNumElements(0, v4s32, v4s32)
1033 .clampNumElements(0, v2s64, v2s64)
1034 .minScalarOrElt(0, s8)
1037 .minScalarSameAs(1, 0);
1038
1039 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1040
1043 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
1044 .scalarize(1)
1045 .widenScalarToNextPow2(1, /*Min=*/32)
1046 .clampScalar(1, s32, s64)
1047 .scalarSameSizeAs(0, 1);
1048 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
1049
1050 // TODO: Custom lowering for v2s32, v4s32, v2s64.
1051 getActionDefinitionsBuilder(G_BITREVERSE)
1052 .legalFor({s32, s64, v8s8, v16s8})
1053 .widenScalarToNextPow2(0, /*Min = */ 32)
1054 .clampScalar(0, s32, s64)
1055 .lower();
1056
1057 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
1058
1060 .lowerIf(isVector(0))
1061 .widenScalarToNextPow2(1, /*Min=*/32)
1062 .clampScalar(1, s32, s64)
1063 .scalarSameSizeAs(0, 1)
1064 .legalFor(HasCSSC, {s32, s64})
1065 .customFor(!HasCSSC, {s32, s64});
1066
1067 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1068 .legalIf([=](const LegalityQuery &Query) {
1069 const LLT &DstTy = Query.Types[0];
1070 const LLT &SrcTy = Query.Types[1];
1071 // For now just support the TBL2 variant which needs the source vectors
1072 // to be the same size as the dest.
1073 if (DstTy != SrcTy)
1074 return false;
1075 return llvm::is_contained(
1076 {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1077 })
1078 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
1079 // destinations, we just want those lowered into G_BUILD_VECTOR or
1080 // G_EXTRACT_ELEMENT.
1081 .lowerIf([=](const LegalityQuery &Query) {
1082 return !Query.Types[0].isVector() || !Query.Types[1].isVector();
1083 })
1084 .moreElementsIf(
1085 [](const LegalityQuery &Query) {
1086 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1087 Query.Types[0].getNumElements() >
1088 Query.Types[1].getNumElements();
1089 },
1090 changeTo(1, 0))
1092 .moreElementsIf(
1093 [](const LegalityQuery &Query) {
1094 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1095 Query.Types[0].getNumElements() <
1096 Query.Types[1].getNumElements();
1097 },
1098 changeTo(0, 1))
1099 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1100 .clampNumElements(0, v8s8, v16s8)
1101 .clampNumElements(0, v4s16, v8s16)
1102 .clampNumElements(0, v4s32, v4s32)
1103 .clampNumElements(0, v2s64, v2s64)
1104 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1105 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1106 // Bitcast pointers vector to i64.
1107 const LLT DstTy = Query.Types[0];
1108 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1109 });
1110
1111 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1112 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1113 .bitcastIf(
1114 [=](const LegalityQuery &Query) {
1115 return Query.Types[0].getSizeInBits() <= 128 &&
1116 Query.Types[1].getSizeInBits() <= 64;
1117 },
1118 [=](const LegalityQuery &Query) {
1119 const LLT DstTy = Query.Types[0];
1120 const LLT SrcTy = Query.Types[1];
1121 return std::pair(
1122 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1125 SrcTy.getNumElements())));
1126 });
1127
1128 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1129
1130 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1131
1132 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1133
1134 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1135
1136 if (ST.hasMOPS()) {
1137 // G_BZERO is not supported. Currently it is only emitted by
1138 // PreLegalizerCombiner for G_MEMSET with zero constant.
1140
1142 .legalForCartesianProduct({p0}, {s64}, {s64})
1143 .customForCartesianProduct({p0}, {s8}, {s64})
1144 .immIdx(0); // Inform verifier imm idx 0 is handled.
1145
1146 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1147 .legalForCartesianProduct({p0}, {p0}, {s64})
1148 .immIdx(0); // Inform verifier imm idx 0 is handled.
1149
1150 // G_MEMCPY_INLINE does not have a tailcall immediate
1151 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1152 .legalForCartesianProduct({p0}, {p0}, {s64});
1153
1154 } else {
1155 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1156 .libcall();
1157 }
1158
1159 // FIXME: Legal vector types are only legal with NEON.
1161 .legalFor(HasCSSC, {s32, s64})
1162 .legalFor(PackedVectorAllTypeList)
1163 .customIf([=](const LegalityQuery &Q) {
1164 // TODO: Fix suboptimal codegen for 128+ bit types.
1165 LLT SrcTy = Q.Types[0];
1166 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1167 })
1168 .widenScalarIf(
1169 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1170 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
1171 .widenScalarIf(
1172 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1173 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
1174 .clampNumElements(0, v8s8, v16s8)
1175 .clampNumElements(0, v4s16, v8s16)
1176 .clampNumElements(0, v2s32, v4s32)
1177 .clampNumElements(0, v2s64, v2s64)
1179 .lower();
1180
1181 // For fadd reductions we have pairwise operations available. We treat the
1182 // usual legal types as legal and handle the lowering to pairwise instructions
1183 // later.
1184 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1185 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1186 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1187 .minScalarOrElt(0, MinFPScalar)
1188 .clampMaxNumElements(1, s64, 2)
1189 .clampMaxNumElements(1, s32, 4)
1190 .clampMaxNumElements(1, s16, 8)
1191 .lower();
1192
1193 // For fmul reductions we need to split up into individual operations. We
1194 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1195 // smaller types, followed by scalarizing what remains.
1196 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1197 .minScalarOrElt(0, MinFPScalar)
1198 .clampMaxNumElements(1, s64, 2)
1199 .clampMaxNumElements(1, s32, 4)
1200 .clampMaxNumElements(1, s16, 8)
1201 .clampMaxNumElements(1, s32, 2)
1202 .clampMaxNumElements(1, s16, 4)
1203 .scalarize(1)
1204 .lower();
1205
1206 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1207 .scalarize(2)
1208 .lower();
1209
1210 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1211 .legalFor({{s8, v16s8},
1212 {s8, v8s8},
1213 {s16, v8s16},
1214 {s16, v4s16},
1215 {s32, v4s32},
1216 {s32, v2s32},
1217 {s64, v2s64}})
1219 .clampMaxNumElements(1, s64, 2)
1220 .clampMaxNumElements(1, s32, 4)
1221 .clampMaxNumElements(1, s16, 8)
1222 .clampMaxNumElements(1, s8, 16)
1223 .widenVectorEltsToVectorMinSize(1, 64)
1224 .scalarize(1);
1225
1226 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1227 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1228 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1229 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1230 .minScalarOrElt(0, MinFPScalar)
1231 .clampMaxNumElements(1, s64, 2)
1232 .clampMaxNumElements(1, s32, 4)
1233 .clampMaxNumElements(1, s16, 8)
1234 .lower();
1235
1236 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1237 .clampMaxNumElements(1, s32, 2)
1238 .clampMaxNumElements(1, s16, 4)
1239 .clampMaxNumElements(1, s8, 8)
1240 .scalarize(1)
1241 .lower();
1242
1244 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1245 .legalFor({{s8, v8s8},
1246 {s8, v16s8},
1247 {s16, v4s16},
1248 {s16, v8s16},
1249 {s32, v2s32},
1250 {s32, v4s32}})
1251 .moreElementsIf(
1252 [=](const LegalityQuery &Query) {
1253 return Query.Types[1].isVector() &&
1254 Query.Types[1].getElementType() != s8 &&
1255 Query.Types[1].getNumElements() & 1;
1256 },
1258 .clampMaxNumElements(1, s64, 2)
1259 .clampMaxNumElements(1, s32, 4)
1260 .clampMaxNumElements(1, s16, 8)
1261 .clampMaxNumElements(1, s8, 16)
1262 .scalarize(1)
1263 .lower();
1264
1266 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1267 // Try to break down into smaller vectors as long as they're at least 64
1268 // bits. This lets us use vector operations for some parts of the
1269 // reduction.
1270 .fewerElementsIf(
1271 [=](const LegalityQuery &Q) {
1272 LLT SrcTy = Q.Types[1];
1273 if (SrcTy.isScalar())
1274 return false;
1275 if (!isPowerOf2_32(SrcTy.getNumElements()))
1276 return false;
1277 // We can usually perform 64b vector operations.
1278 return SrcTy.getSizeInBits() > 64;
1279 },
1280 [=](const LegalityQuery &Q) {
1281 LLT SrcTy = Q.Types[1];
1282 return std::make_pair(1, SrcTy.divide(2));
1283 })
1284 .scalarize(1)
1285 .lower();
1286
1287 // TODO: Update this to correct handling when adding AArch64/SVE support.
1288 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1289
1290 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1291 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1292 .lower();
1293
1295 .legalFor({{s32, s64}, {s64, s64}})
1296 .customIf([=](const LegalityQuery &Q) {
1297 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1298 })
1299 .lower();
1301
1302 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1303 .customFor({{s32, s32}, {s64, s64}});
1304
1305 auto always = [=](const LegalityQuery &Q) { return true; };
1307 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
1308 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
1309 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
1310 .customFor({{s128, s128},
1311 {v2s64, v2s64},
1312 {v2s32, v2s32},
1313 {v4s32, v4s32},
1314 {v4s16, v4s16},
1315 {v8s16, v8s16}})
1316 .clampScalar(0, s32, s128)
1318 .minScalarEltSameAsIf(always, 1, 0)
1319 .maxScalarEltSameAsIf(always, 1, 0);
1320
1321 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1322 .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1323 .legalFor(HasSVE, {nxv2s64, nxv4s32, nxv8s16, nxv16s8})
1324 .clampNumElements(0, v8s8, v16s8)
1325 .clampNumElements(0, v4s16, v8s16)
1326 .clampNumElements(0, v2s32, v4s32)
1327 .clampMaxNumElements(0, s64, 2)
1330 .lower();
1331
1332 // TODO: Libcall support for s128.
1333 // TODO: s16 should be legal with full FP16 support.
1334 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1335 .legalFor({{s64, s32}, {s64, s64}});
1336
1337 // TODO: Custom legalization for mismatched types.
1338 getActionDefinitionsBuilder(G_FCOPYSIGN)
1340 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1341 [=](const LegalityQuery &Query) {
1342 const LLT Ty = Query.Types[0];
1343 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1344 })
1345 .lower();
1346
1348
1349 // Access to floating-point environment.
1350 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1351 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1352 .libcall();
1353
1354 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1355
1356 getActionDefinitionsBuilder(G_PREFETCH).custom();
1357
1358 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1359
1360 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1361 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1363 .immIdx(0); // Inform verifier imm idx 0 is handled.
1364
1365 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1366 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1367 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1368
1370 verify(*ST.getInstrInfo());
1371}
1372
1375 LostDebugLocObserver &LocObserver) const {
1376 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1377 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1378 GISelChangeObserver &Observer = Helper.Observer;
1379 switch (MI.getOpcode()) {
1380 default:
1381 // No idea what to do.
1382 return false;
1383 case TargetOpcode::G_VAARG:
1384 return legalizeVaArg(MI, MRI, MIRBuilder);
1385 case TargetOpcode::G_LOAD:
1386 case TargetOpcode::G_STORE:
1387 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1388 case TargetOpcode::G_SHL:
1389 case TargetOpcode::G_ASHR:
1390 case TargetOpcode::G_LSHR:
1391 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1392 case TargetOpcode::G_GLOBAL_VALUE:
1393 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1394 case TargetOpcode::G_SBFX:
1395 case TargetOpcode::G_UBFX:
1396 return legalizeBitfieldExtract(MI, MRI, Helper);
1397 case TargetOpcode::G_FSHL:
1398 case TargetOpcode::G_FSHR:
1399 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1400 case TargetOpcode::G_ROTR:
1401 return legalizeRotate(MI, MRI, Helper);
1402 case TargetOpcode::G_CTPOP:
1403 return legalizeCTPOP(MI, MRI, Helper);
1404 case TargetOpcode::G_ATOMIC_CMPXCHG:
1405 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1406 case TargetOpcode::G_CTTZ:
1407 return legalizeCTTZ(MI, Helper);
1408 case TargetOpcode::G_BZERO:
1409 case TargetOpcode::G_MEMCPY:
1410 case TargetOpcode::G_MEMMOVE:
1411 case TargetOpcode::G_MEMSET:
1412 return legalizeMemOps(MI, Helper);
1413 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1414 return legalizeExtractVectorElt(MI, MRI, Helper);
1415 case TargetOpcode::G_DYN_STACKALLOC:
1416 return legalizeDynStackAlloc(MI, Helper);
1417 case TargetOpcode::G_PREFETCH:
1418 return legalizePrefetch(MI, Helper);
1419 case TargetOpcode::G_ABS:
1420 return Helper.lowerAbsToCNeg(MI);
1421 case TargetOpcode::G_ICMP:
1422 return legalizeICMP(MI, MRI, MIRBuilder);
1423 case TargetOpcode::G_BITCAST:
1424 return legalizeBitcast(MI, Helper);
1425 }
1426
1427 llvm_unreachable("expected switch to return");
1428}
1429
1430bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1431 LegalizerHelper &Helper) const {
1432 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1433 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1434 // We're trying to handle casts from i1 vectors to scalars but reloading from
1435 // stack.
1436 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1437 SrcTy.getElementType() != LLT::scalar(1))
1438 return false;
1439
1440 Helper.createStackStoreLoad(DstReg, SrcReg);
1441 MI.eraseFromParent();
1442 return true;
1443}
1444
1445bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1447 MachineIRBuilder &MIRBuilder,
1448 GISelChangeObserver &Observer,
1449 LegalizerHelper &Helper) const {
1450 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1451 MI.getOpcode() == TargetOpcode::G_FSHR);
1452
1453 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1454 // lowering
1455 Register ShiftNo = MI.getOperand(3).getReg();
1456 LLT ShiftTy = MRI.getType(ShiftNo);
1457 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1458
1459 // Adjust shift amount according to Opcode (FSHL/FSHR)
1460 // Convert FSHL to FSHR
1461 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1462 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1463
1464 // Lower non-constant shifts and leave zero shifts to the optimizer.
1465 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1466 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1468
1469 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1470
1471 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1472
1473 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1474 // in the range of 0 <-> BitWidth, it is legal
1475 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1476 VRegAndVal->Value.ult(BitWidth))
1477 return true;
1478
1479 // Cast the ShiftNumber to a 64-bit type
1480 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1481
1482 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1483 Observer.changingInstr(MI);
1484 MI.getOperand(3).setReg(Cast64.getReg(0));
1485 Observer.changedInstr(MI);
1486 }
1487 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1488 // instruction
1489 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1490 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1491 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1492 Cast64.getReg(0)});
1493 MI.eraseFromParent();
1494 }
1495 return true;
1496}
1497
1498bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1500 MachineIRBuilder &MIRBuilder) const {
1501 Register DstReg = MI.getOperand(0).getReg();
1502 Register SrcReg1 = MI.getOperand(2).getReg();
1503 Register SrcReg2 = MI.getOperand(3).getReg();
1504 LLT DstTy = MRI.getType(DstReg);
1505 LLT SrcTy = MRI.getType(SrcReg1);
1506
1507 // Check the vector types are legal
1508 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1509 DstTy.getNumElements() != SrcTy.getNumElements() ||
1510 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1511 return false;
1512
1513 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1514 // following passes
1515 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1516 if (Pred != CmpInst::ICMP_NE)
1517 return true;
1518 Register CmpReg =
1519 MIRBuilder
1520 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1521 .getReg(0);
1522 MIRBuilder.buildNot(DstReg, CmpReg);
1523
1524 MI.eraseFromParent();
1525 return true;
1526}
1527
1528bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1530 LegalizerHelper &Helper) const {
1531 // To allow for imported patterns to match, we ensure that the rotate amount
1532 // is 64b with an extension.
1533 Register AmtReg = MI.getOperand(2).getReg();
1534 LLT AmtTy = MRI.getType(AmtReg);
1535 (void)AmtTy;
1536 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1537 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1538 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1539 Helper.Observer.changingInstr(MI);
1540 MI.getOperand(2).setReg(NewAmt.getReg(0));
1541 Helper.Observer.changedInstr(MI);
1542 return true;
1543}
1544
1545bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1547 GISelChangeObserver &Observer) const {
1548 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1549 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1550 // G_ADD_LOW instructions.
1551 // By splitting this here, we can optimize accesses in the small code model by
1552 // folding in the G_ADD_LOW into the load/store offset.
1553 auto &GlobalOp = MI.getOperand(1);
1554 // Don't modify an intrinsic call.
1555 if (GlobalOp.isSymbol())
1556 return true;
1557 const auto* GV = GlobalOp.getGlobal();
1558 if (GV->isThreadLocal())
1559 return true; // Don't want to modify TLS vars.
1560
1561 auto &TM = ST->getTargetLowering()->getTargetMachine();
1562 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1563
1564 if (OpFlags & AArch64II::MO_GOT)
1565 return true;
1566
1567 auto Offset = GlobalOp.getOffset();
1568 Register DstReg = MI.getOperand(0).getReg();
1569 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1570 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1571 // Set the regclass on the dest reg too.
1572 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1573
1574 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1575 // by creating a MOVK that sets bits 48-63 of the register to (global address
1576 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1577 // prevent an incorrect tag being generated during relocation when the
1578 // global appears before the code section. Without the offset, a global at
1579 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1580 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1581 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1582 // instead of `0xf`.
1583 // This assumes that we're in the small code model so we can assume a binary
1584 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1585 // binary must also be loaded into address range [0, 2^48). Both of these
1586 // properties need to be ensured at runtime when using tagged addresses.
1587 if (OpFlags & AArch64II::MO_TAGGED) {
1588 assert(!Offset &&
1589 "Should not have folded in an offset for a tagged global!");
1590 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1591 .addGlobalAddress(GV, 0x100000000,
1593 .addImm(48);
1594 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1595 }
1596
1597 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1598 .addGlobalAddress(GV, Offset,
1600 MI.eraseFromParent();
1601 return true;
1602}
1603
1605 MachineInstr &MI) const {
1606 auto LowerBinOp = [&MI](unsigned Opcode) {
1607 MachineIRBuilder MIB(MI);
1608 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1609 {MI.getOperand(2), MI.getOperand(3)});
1610 MI.eraseFromParent();
1611 return true;
1612 };
1613
1614 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1615 switch (IntrinsicID) {
1616 case Intrinsic::vacopy: {
1617 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1618 unsigned VaListSize =
1619 (ST->isTargetDarwin() || ST->isTargetWindows())
1620 ? PtrSize
1621 : ST->isTargetILP32() ? 20 : 32;
1622
1623 MachineFunction &MF = *MI.getMF();
1625 LLT::scalar(VaListSize * 8));
1626 MachineIRBuilder MIB(MI);
1627 MIB.buildLoad(Val, MI.getOperand(2),
1630 VaListSize, Align(PtrSize)));
1631 MIB.buildStore(Val, MI.getOperand(1),
1634 VaListSize, Align(PtrSize)));
1635 MI.eraseFromParent();
1636 return true;
1637 }
1638 case Intrinsic::get_dynamic_area_offset: {
1639 MachineIRBuilder &MIB = Helper.MIRBuilder;
1640 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1641 MI.eraseFromParent();
1642 return true;
1643 }
1644 case Intrinsic::aarch64_mops_memset_tag: {
1645 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1646 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1647 // the instruction).
1648 MachineIRBuilder MIB(MI);
1649 auto &Value = MI.getOperand(3);
1650 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1651 Value.setReg(ExtValueReg);
1652 return true;
1653 }
1654 case Intrinsic::aarch64_prefetch: {
1655 MachineIRBuilder MIB(MI);
1656 auto &AddrVal = MI.getOperand(1);
1657
1658 int64_t IsWrite = MI.getOperand(2).getImm();
1659 int64_t Target = MI.getOperand(3).getImm();
1660 int64_t IsStream = MI.getOperand(4).getImm();
1661 int64_t IsData = MI.getOperand(5).getImm();
1662
1663 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1664 (!IsData << 3) | // IsDataCache bit
1665 (Target << 1) | // Cache level bits
1666 (unsigned)IsStream; // Stream bit
1667
1668 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1669 MI.eraseFromParent();
1670 return true;
1671 }
1672 case Intrinsic::aarch64_neon_uaddv:
1673 case Intrinsic::aarch64_neon_saddv:
1674 case Intrinsic::aarch64_neon_umaxv:
1675 case Intrinsic::aarch64_neon_smaxv:
1676 case Intrinsic::aarch64_neon_uminv:
1677 case Intrinsic::aarch64_neon_sminv: {
1678 MachineIRBuilder MIB(MI);
1679 MachineRegisterInfo &MRI = *MIB.getMRI();
1680 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1681 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1682 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1683
1684 auto OldDst = MI.getOperand(0).getReg();
1685 auto OldDstTy = MRI.getType(OldDst);
1686 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1687 if (OldDstTy == NewDstTy)
1688 return true;
1689
1690 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1691
1692 Helper.Observer.changingInstr(MI);
1693 MI.getOperand(0).setReg(NewDst);
1694 Helper.Observer.changedInstr(MI);
1695
1696 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1697 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1698 OldDst, NewDst);
1699
1700 return true;
1701 }
1702 case Intrinsic::aarch64_neon_uaddlp:
1703 case Intrinsic::aarch64_neon_saddlp: {
1704 MachineIRBuilder MIB(MI);
1705
1706 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1707 ? AArch64::G_UADDLP
1708 : AArch64::G_SADDLP;
1709 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1710 MI.eraseFromParent();
1711
1712 return true;
1713 }
1714 case Intrinsic::aarch64_neon_uaddlv:
1715 case Intrinsic::aarch64_neon_saddlv: {
1716 MachineIRBuilder MIB(MI);
1717 MachineRegisterInfo &MRI = *MIB.getMRI();
1718
1719 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1720 ? AArch64::G_UADDLV
1721 : AArch64::G_SADDLV;
1722 Register DstReg = MI.getOperand(0).getReg();
1723 Register SrcReg = MI.getOperand(2).getReg();
1724 LLT DstTy = MRI.getType(DstReg);
1725
1726 LLT MidTy, ExtTy;
1727 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1728 MidTy = LLT::fixed_vector(4, 32);
1729 ExtTy = LLT::scalar(32);
1730 } else {
1731 MidTy = LLT::fixed_vector(2, 64);
1732 ExtTy = LLT::scalar(64);
1733 }
1734
1735 Register MidReg =
1736 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1737 Register ZeroReg =
1738 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1739 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1740 {MidReg, ZeroReg})
1741 .getReg(0);
1742
1743 if (DstTy.getScalarSizeInBits() < 32)
1744 MIB.buildTrunc(DstReg, ExtReg);
1745 else
1746 MIB.buildCopy(DstReg, ExtReg);
1747
1748 MI.eraseFromParent();
1749
1750 return true;
1751 }
1752 case Intrinsic::aarch64_neon_smax:
1753 return LowerBinOp(TargetOpcode::G_SMAX);
1754 case Intrinsic::aarch64_neon_smin:
1755 return LowerBinOp(TargetOpcode::G_SMIN);
1756 case Intrinsic::aarch64_neon_umax:
1757 return LowerBinOp(TargetOpcode::G_UMAX);
1758 case Intrinsic::aarch64_neon_umin:
1759 return LowerBinOp(TargetOpcode::G_UMIN);
1760 case Intrinsic::aarch64_neon_fmax:
1761 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1762 case Intrinsic::aarch64_neon_fmin:
1763 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1764 case Intrinsic::aarch64_neon_fmaxnm:
1765 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1766 case Intrinsic::aarch64_neon_fminnm:
1767 return LowerBinOp(TargetOpcode::G_FMINNUM);
1768 case Intrinsic::aarch64_neon_smull:
1769 return LowerBinOp(AArch64::G_SMULL);
1770 case Intrinsic::aarch64_neon_umull:
1771 return LowerBinOp(AArch64::G_UMULL);
1772 case Intrinsic::aarch64_neon_abs: {
1773 // Lower the intrinsic to G_ABS.
1774 MachineIRBuilder MIB(MI);
1775 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1776 MI.eraseFromParent();
1777 return true;
1778 }
1779
1780 case Intrinsic::vector_reverse:
1781 // TODO: Add support for vector_reverse
1782 return false;
1783 }
1784
1785 return true;
1786}
1787
1788bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1790 GISelChangeObserver &Observer) const {
1791 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1792 MI.getOpcode() == TargetOpcode::G_LSHR ||
1793 MI.getOpcode() == TargetOpcode::G_SHL);
1794 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1795 // imported patterns can select it later. Either way, it will be legal.
1796 Register AmtReg = MI.getOperand(2).getReg();
1797 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1798 if (!VRegAndVal)
1799 return true;
1800 // Check the shift amount is in range for an immediate form.
1801 int64_t Amount = VRegAndVal->Value.getSExtValue();
1802 if (Amount > 31)
1803 return true; // This will have to remain a register variant.
1804 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1805 Observer.changingInstr(MI);
1806 MI.getOperand(2).setReg(ExtCst.getReg(0));
1807 Observer.changedInstr(MI);
1808 return true;
1809}
1810
1813 Base = Root;
1814 Offset = 0;
1815
1816 Register NewBase;
1817 int64_t NewOffset;
1818 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1819 isShiftedInt<7, 3>(NewOffset)) {
1820 Base = NewBase;
1821 Offset = NewOffset;
1822 }
1823}
1824
1825// FIXME: This should be removed and replaced with the generic bitcast legalize
1826// action.
1827bool AArch64LegalizerInfo::legalizeLoadStore(
1829 GISelChangeObserver &Observer) const {
1830 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1831 MI.getOpcode() == TargetOpcode::G_LOAD);
1832 // Here we just try to handle vector loads/stores where our value type might
1833 // have pointer elements, which the SelectionDAG importer can't handle. To
1834 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1835 // the value to use s64 types.
1836
1837 // Custom legalization requires the instruction, if not deleted, must be fully
1838 // legalized. In order to allow further legalization of the inst, we create
1839 // a new instruction and erase the existing one.
1840
1841 Register ValReg = MI.getOperand(0).getReg();
1842 const LLT ValTy = MRI.getType(ValReg);
1843
1844 if (ValTy == LLT::scalar(128)) {
1845
1846 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1847 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1848 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1849 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1850 bool IsRcpC3 =
1851 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1852
1853 LLT s64 = LLT::scalar(64);
1854
1855 unsigned Opcode;
1856 if (IsRcpC3) {
1857 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1858 } else {
1859 // For LSE2, loads/stores should have been converted to monotonic and had
1860 // a fence inserted after them.
1861 assert(Ordering == AtomicOrdering::Monotonic ||
1862 Ordering == AtomicOrdering::Unordered);
1863 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1864
1865 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1866 }
1867
1869 if (IsLoad) {
1870 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1871 MIRBuilder.buildMergeLikeInstr(
1872 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1873 } else {
1874 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1875 NewI = MIRBuilder.buildInstr(
1876 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1877 }
1878
1879 if (IsRcpC3) {
1880 NewI.addUse(MI.getOperand(1).getReg());
1881 } else {
1882 Register Base;
1883 int Offset;
1884 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1885 NewI.addUse(Base);
1886 NewI.addImm(Offset / 8);
1887 }
1888
1889 NewI.cloneMemRefs(MI);
1891 *MRI.getTargetRegisterInfo(),
1892 *ST->getRegBankInfo());
1893 MI.eraseFromParent();
1894 return true;
1895 }
1896
1897 if (!ValTy.isPointerVector() ||
1898 ValTy.getElementType().getAddressSpace() != 0) {
1899 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1900 return false;
1901 }
1902
1903 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1904 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1905 auto &MMO = **MI.memoperands_begin();
1906 MMO.setType(NewTy);
1907
1908 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1909 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1910 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1911 } else {
1912 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1913 MIRBuilder.buildBitcast(ValReg, NewLoad);
1914 }
1915 MI.eraseFromParent();
1916 return true;
1917}
1918
1919bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1921 MachineIRBuilder &MIRBuilder) const {
1922 MachineFunction &MF = MIRBuilder.getMF();
1923 Align Alignment(MI.getOperand(2).getImm());
1924 Register Dst = MI.getOperand(0).getReg();
1925 Register ListPtr = MI.getOperand(1).getReg();
1926
1927 LLT PtrTy = MRI.getType(ListPtr);
1928 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1929
1930 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1931 const Align PtrAlign = Align(PtrSize);
1932 auto List = MIRBuilder.buildLoad(
1933 PtrTy, ListPtr,
1935 PtrTy, PtrAlign));
1936
1937 MachineInstrBuilder DstPtr;
1938 if (Alignment > PtrAlign) {
1939 // Realign the list to the actual required alignment.
1940 auto AlignMinus1 =
1941 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1942 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1943 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1944 } else
1945 DstPtr = List;
1946
1947 LLT ValTy = MRI.getType(Dst);
1948 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1949 MIRBuilder.buildLoad(
1950 Dst, DstPtr,
1952 ValTy, std::max(Alignment, PtrAlign)));
1953
1954 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1955
1956 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1957
1958 MIRBuilder.buildStore(NewList, ListPtr,
1961 PtrTy, PtrAlign));
1962
1963 MI.eraseFromParent();
1964 return true;
1965}
1966
1967bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1969 // Only legal if we can select immediate forms.
1970 // TODO: Lower this otherwise.
1971 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1972 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1973}
1974
1975bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1977 LegalizerHelper &Helper) const {
1978 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1979 // it can be more efficiently lowered to the following sequence that uses
1980 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1981 // registers are cheap.
1982 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1983 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1984 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1985 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1986 //
1987 // For 128 bit vector popcounts, we lower to the following sequence:
1988 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1989 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1990 // uaddlp.4s v0, v0 // v4s32, v2s64
1991 // uaddlp.2d v0, v0 // v2s64
1992 //
1993 // For 64 bit vector popcounts, we lower to the following sequence:
1994 // cnt.8b v0, v0 // v4s16, v2s32
1995 // uaddlp.4h v0, v0 // v4s16, v2s32
1996 // uaddlp.2s v0, v0 // v2s32
1997
1998 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1999 Register Dst = MI.getOperand(0).getReg();
2000 Register Val = MI.getOperand(1).getReg();
2001 LLT Ty = MRI.getType(Val);
2002 unsigned Size = Ty.getSizeInBits();
2003
2004 assert(Ty == MRI.getType(Dst) &&
2005 "Expected src and dst to have the same type!");
2006
2007 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2008 LLT s64 = LLT::scalar(64);
2009
2010 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2011 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2012 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2013 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2014
2015 MIRBuilder.buildZExt(Dst, Add);
2016 MI.eraseFromParent();
2017 return true;
2018 }
2019
2020 if (!ST->hasNEON() ||
2021 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2022 // Use generic lowering when custom lowering is not possible.
2023 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2024 Helper.lowerBitCount(MI) ==
2026 }
2027
2028 // Pre-conditioning: widen Val up to the nearest vector type.
2029 // s32,s64,v4s16,v2s32 -> v8i8
2030 // v8s16,v4s32,v2s64 -> v16i8
2031 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2032 if (Ty.isScalar()) {
2033 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2034 if (Size == 32) {
2035 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2036 }
2037 }
2038 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2039
2040 // Count bits in each byte-sized lane.
2041 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2042
2043 // Sum across lanes.
2044
2045 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2046 Ty.getScalarSizeInBits() != 16) {
2047 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2048 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2049 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2051
2052 if (Ty == LLT::fixed_vector(2, 64)) {
2053 auto UDOT =
2054 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2055 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2056 } else if (Ty == LLT::fixed_vector(4, 32)) {
2057 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2058 } else if (Ty == LLT::fixed_vector(2, 32)) {
2059 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2060 } else {
2061 llvm_unreachable("unexpected vector shape");
2062 }
2063
2064 Sum->getOperand(0).setReg(Dst);
2065 MI.eraseFromParent();
2066 return true;
2067 }
2068
2069 Register HSum = CTPOP.getReg(0);
2070 unsigned Opc;
2071 SmallVector<LLT> HAddTys;
2072 if (Ty.isScalar()) {
2073 Opc = Intrinsic::aarch64_neon_uaddlv;
2074 HAddTys.push_back(LLT::scalar(32));
2075 } else if (Ty == LLT::fixed_vector(8, 16)) {
2076 Opc = Intrinsic::aarch64_neon_uaddlp;
2077 HAddTys.push_back(LLT::fixed_vector(8, 16));
2078 } else if (Ty == LLT::fixed_vector(4, 32)) {
2079 Opc = Intrinsic::aarch64_neon_uaddlp;
2080 HAddTys.push_back(LLT::fixed_vector(8, 16));
2081 HAddTys.push_back(LLT::fixed_vector(4, 32));
2082 } else if (Ty == LLT::fixed_vector(2, 64)) {
2083 Opc = Intrinsic::aarch64_neon_uaddlp;
2084 HAddTys.push_back(LLT::fixed_vector(8, 16));
2085 HAddTys.push_back(LLT::fixed_vector(4, 32));
2086 HAddTys.push_back(LLT::fixed_vector(2, 64));
2087 } else if (Ty == LLT::fixed_vector(4, 16)) {
2088 Opc = Intrinsic::aarch64_neon_uaddlp;
2089 HAddTys.push_back(LLT::fixed_vector(4, 16));
2090 } else if (Ty == LLT::fixed_vector(2, 32)) {
2091 Opc = Intrinsic::aarch64_neon_uaddlp;
2092 HAddTys.push_back(LLT::fixed_vector(4, 16));
2093 HAddTys.push_back(LLT::fixed_vector(2, 32));
2094 } else
2095 llvm_unreachable("unexpected vector shape");
2097 for (LLT HTy : HAddTys) {
2098 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2099 HSum = UADD.getReg(0);
2100 }
2101
2102 // Post-conditioning.
2103 if (Ty.isScalar() && (Size == 64 || Size == 128))
2104 MIRBuilder.buildZExt(Dst, UADD);
2105 else
2106 UADD->getOperand(0).setReg(Dst);
2107 MI.eraseFromParent();
2108 return true;
2109}
2110
2111bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2113 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2114 LLT s64 = LLT::scalar(64);
2115 auto Addr = MI.getOperand(1).getReg();
2116 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2117 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2118 auto DstLo = MRI.createGenericVirtualRegister(s64);
2119 auto DstHi = MRI.createGenericVirtualRegister(s64);
2120
2122 if (ST->hasLSE()) {
2123 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2124 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2125 // the rest of the MIR so we must reassemble the extracted registers into a
2126 // 128-bit known-regclass one with code like this:
2127 //
2128 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2129 // %out = CASP %in1, ...
2130 // %OldLo = G_EXTRACT %out, 0
2131 // %OldHi = G_EXTRACT %out, 64
2132 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2133 unsigned Opcode;
2134 switch (Ordering) {
2136 Opcode = AArch64::CASPAX;
2137 break;
2139 Opcode = AArch64::CASPLX;
2140 break;
2143 Opcode = AArch64::CASPALX;
2144 break;
2145 default:
2146 Opcode = AArch64::CASPX;
2147 break;
2148 }
2149
2150 LLT s128 = LLT::scalar(128);
2151 auto CASDst = MRI.createGenericVirtualRegister(s128);
2152 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2153 auto CASNew = MRI.createGenericVirtualRegister(s128);
2154 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2155 .addUse(DesiredI->getOperand(0).getReg())
2156 .addImm(AArch64::sube64)
2157 .addUse(DesiredI->getOperand(1).getReg())
2158 .addImm(AArch64::subo64);
2159 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2160 .addUse(NewI->getOperand(0).getReg())
2161 .addImm(AArch64::sube64)
2162 .addUse(NewI->getOperand(1).getReg())
2163 .addImm(AArch64::subo64);
2164
2165 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2166
2167 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2168 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2169 } else {
2170 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2171 // can take arbitrary registers so it just has the normal GPR64 operands the
2172 // rest of AArch64 is expecting.
2173 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2174 unsigned Opcode;
2175 switch (Ordering) {
2177 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2178 break;
2180 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2181 break;
2184 Opcode = AArch64::CMP_SWAP_128;
2185 break;
2186 default:
2187 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2188 break;
2189 }
2190
2191 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2192 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2193 {Addr, DesiredI->getOperand(0),
2194 DesiredI->getOperand(1), NewI->getOperand(0),
2195 NewI->getOperand(1)});
2196 }
2197
2198 CAS.cloneMemRefs(MI);
2200 *MRI.getTargetRegisterInfo(),
2201 *ST->getRegBankInfo());
2202
2203 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2204 MI.eraseFromParent();
2205 return true;
2206}
2207
2208bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2209 LegalizerHelper &Helper) const {
2210 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2211 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2212 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2213 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2214 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2215 MI.eraseFromParent();
2216 return true;
2217}
2218
2219bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2220 LegalizerHelper &Helper) const {
2221 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2222
2223 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2224 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2225 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2226 // the instruction).
2227 auto &Value = MI.getOperand(1);
2228 Register ExtValueReg =
2229 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2230 Value.setReg(ExtValueReg);
2231 return true;
2232 }
2233
2234 return false;
2235}
2236
2237bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2239 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2240 auto VRegAndVal =
2242 if (VRegAndVal)
2243 return true;
2244 LLT VecTy = MRI.getType(Element->getVectorReg());
2245 if (VecTy.isScalableVector())
2246 return true;
2247 return Helper.lowerExtractInsertVectorElt(MI) !=
2249}
2250
2251bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2252 MachineInstr &MI, LegalizerHelper &Helper) const {
2253 MachineFunction &MF = *MI.getParent()->getParent();
2254 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2255 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2256
2257 // If stack probing is not enabled for this function, use the default
2258 // lowering.
2259 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2260 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2261 "inline-asm") {
2262 Helper.lowerDynStackAlloc(MI);
2263 return true;
2264 }
2265
2266 Register Dst = MI.getOperand(0).getReg();
2267 Register AllocSize = MI.getOperand(1).getReg();
2268 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2269
2270 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2271 "Unexpected type for dynamic alloca");
2272 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2273 "Unexpected type for dynamic alloca");
2274
2275 LLT PtrTy = MRI.getType(Dst);
2276 Register SPReg =
2278 Register SPTmp =
2279 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2280 auto NewMI =
2281 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2282 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2283 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2284 MIRBuilder.buildCopy(Dst, SPTmp);
2285
2286 MI.eraseFromParent();
2287 return true;
2288}
2289
2290bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2291 LegalizerHelper &Helper) const {
2292 MachineIRBuilder &MIB = Helper.MIRBuilder;
2293 auto &AddrVal = MI.getOperand(0);
2294
2295 int64_t IsWrite = MI.getOperand(1).getImm();
2296 int64_t Locality = MI.getOperand(2).getImm();
2297 int64_t IsData = MI.getOperand(3).getImm();
2298
2299 bool IsStream = Locality == 0;
2300 if (Locality != 0) {
2301 assert(Locality <= 3 && "Prefetch locality out-of-range");
2302 // The locality degree is the opposite of the cache speed.
2303 // Put the number the other way around.
2304 // The encoding starts at 0 for level 1
2305 Locality = 3 - Locality;
2306 }
2307
2308 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2309
2310 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2311 MI.eraseFromParent();
2312 return true;
2313}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr Register SPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:396
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Represents an extract vector element.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:181
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:264
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:190
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:277
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:183
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:218
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:227
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:234
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:71
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...