LLVM 22.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v2s8 = LLT::fixed_vector(2, 8);
54 const LLT v8s16 = LLT::fixed_vector(8, 16);
55 const LLT v4s16 = LLT::fixed_vector(4, 16);
56 const LLT v2s16 = LLT::fixed_vector(2, 16);
57 const LLT v2s32 = LLT::fixed_vector(2, 32);
58 const LLT v4s32 = LLT::fixed_vector(4, 32);
59 const LLT v2s64 = LLT::fixed_vector(2, 64);
60 const LLT v2p0 = LLT::fixed_vector(2, p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
63 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
64 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
65 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
95 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor({p0, s8, s16, s32, s64})
97 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
98 v2s64, v2p0})
99 .widenScalarToNextPow2(0)
100 .clampScalar(0, s8, s64)
103 .clampNumElements(0, v8s8, v16s8)
104 .clampNumElements(0, v4s16, v8s16)
105 .clampNumElements(0, v2s32, v4s32)
106 .clampMaxNumElements(0, s64, 2)
107 .clampMaxNumElements(0, p0, 2)
109
111 .legalFor({p0, s16, s32, s64})
112 .legalFor(PackedVectorAllTypeList)
116 .clampScalar(0, s16, s64)
117 .clampNumElements(0, v8s8, v16s8)
118 .clampNumElements(0, v4s16, v8s16)
119 .clampNumElements(0, v2s32, v4s32)
120 .clampMaxNumElements(0, s64, 2)
121 .clampMaxNumElements(0, p0, 2);
122
124 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
125 smallerThan(1, 0)))
127 .clampScalar(0, s32, s64)
129 .minScalar(1, s8)
130 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
131 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
132
134 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
135 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
137 .clampScalar(1, s32, s128)
139 .minScalar(0, s16)
140 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
141 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
142 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
143
144 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
145 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
146 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
147 .widenScalarToNextPow2(0)
148 .clampScalar(0, s32, s64)
149 .clampMaxNumElements(0, s8, 16)
150 .clampMaxNumElements(0, s16, 8)
151 .clampNumElements(0, v2s32, v4s32)
152 .clampNumElements(0, v2s64, v2s64)
154 [=](const LegalityQuery &Query) {
155 return Query.Types[0].getNumElements() <= 2;
156 },
157 0, s32)
158 .minScalarOrEltIf(
159 [=](const LegalityQuery &Query) {
160 return Query.Types[0].getNumElements() <= 4;
161 },
162 0, s16)
163 .minScalarOrEltIf(
164 [=](const LegalityQuery &Query) {
165 return Query.Types[0].getNumElements() <= 16;
166 },
167 0, s8)
168 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
170
172 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
173 .widenScalarToNextPow2(0)
174 .clampScalar(0, s32, s64)
175 .clampMaxNumElements(0, s8, 16)
176 .clampMaxNumElements(0, s16, 8)
177 .clampNumElements(0, v2s32, v4s32)
178 .clampNumElements(0, v2s64, v2s64)
180 [=](const LegalityQuery &Query) {
181 return Query.Types[0].getNumElements() <= 2;
182 },
183 0, s32)
184 .minScalarOrEltIf(
185 [=](const LegalityQuery &Query) {
186 return Query.Types[0].getNumElements() <= 4;
187 },
188 0, s16)
189 .minScalarOrEltIf(
190 [=](const LegalityQuery &Query) {
191 return Query.Types[0].getNumElements() <= 16;
192 },
193 0, s8)
194 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
196
197 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
198 .customIf([=](const LegalityQuery &Query) {
199 const auto &SrcTy = Query.Types[0];
200 const auto &AmtTy = Query.Types[1];
201 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
202 AmtTy.getSizeInBits() == 32;
203 })
204 .legalFor({
205 {s32, s32},
206 {s32, s64},
207 {s64, s64},
208 {v8s8, v8s8},
209 {v16s8, v16s8},
210 {v4s16, v4s16},
211 {v8s16, v8s16},
212 {v2s32, v2s32},
213 {v4s32, v4s32},
214 {v2s64, v2s64},
215 })
216 .widenScalarToNextPow2(0)
217 .clampScalar(1, s32, s64)
218 .clampScalar(0, s32, s64)
219 .clampNumElements(0, v8s8, v16s8)
220 .clampNumElements(0, v4s16, v8s16)
221 .clampNumElements(0, v2s32, v4s32)
222 .clampNumElements(0, v2s64, v2s64)
224 .minScalarSameAs(1, 0)
226
228 .legalFor({{p0, s64}, {v2p0, v2s64}})
229 .clampScalarOrElt(1, s64, s64)
230 .clampNumElements(0, v2p0, v2p0);
231
232 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
233
234 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
235 .legalFor({s32, s64})
236 .libcallFor({s128})
237 .clampScalar(0, s32, s64)
239 .scalarize(0);
240
241 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
242 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
243 .libcallFor({s128})
245 .minScalarOrElt(0, s32)
246 .clampNumElements(0, v2s32, v4s32)
247 .clampNumElements(0, v2s64, v2s64)
248 .scalarize(0);
249
250 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
251 .widenScalarToNextPow2(0, /*Min = */ 32)
252 .clampScalar(0, s32, s64)
253 .lower();
254
255 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
256 .legalFor({s64, v16s8, v8s16, v4s32})
257 .lower();
258
259 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
260 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
261 .legalFor(HasCSSC, {s32, s64})
262 .minScalar(HasCSSC, 0, s32)
263 .clampNumElements(0, v8s8, v16s8)
264 .clampNumElements(0, v4s16, v8s16)
265 .clampNumElements(0, v2s32, v4s32)
266 .lower();
267
268 // FIXME: Legal vector types are only legal with NEON.
270 .legalFor(HasCSSC, {s32, s64})
271 .legalFor(PackedVectorAllTypeList)
272 .customIf([=](const LegalityQuery &Q) {
273 // TODO: Fix suboptimal codegen for 128+ bit types.
274 LLT SrcTy = Q.Types[0];
275 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
276 })
277 .widenScalarIf(
278 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
279 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
280 .widenScalarIf(
281 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
282 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
283 .clampNumElements(0, v8s8, v16s8)
284 .clampNumElements(0, v4s16, v8s16)
285 .clampNumElements(0, v2s32, v4s32)
286 .clampNumElements(0, v2s64, v2s64)
288 .lower();
289
290 getActionDefinitionsBuilder({G_ABDS, G_ABDU})
291 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
292 .lower();
293
295 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
296 .legalFor({{s32, s32}, {s64, s32}})
297 .clampScalar(0, s32, s64)
298 .clampScalar(1, s32, s64)
300
301 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
302 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
303 .lower();
304
306 .legalFor({{s32, s64}, {s64, s64}})
307 .customIf([=](const LegalityQuery &Q) {
308 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
309 })
310 .lower();
312
313 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
314 .customFor({{s32, s32}, {s64, s64}});
315
316 auto always = [=](const LegalityQuery &Q) { return true; };
318 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
319 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
320 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
321 .customFor({{s128, s128},
322 {v4s16, v4s16},
323 {v8s16, v8s16},
324 {v2s32, v2s32},
325 {v4s32, v4s32},
326 {v2s64, v2s64}})
327 .clampScalar(0, s32, s128)
329 .minScalarEltSameAsIf(always, 1, 0)
330 .maxScalarEltSameAsIf(always, 1, 0)
331 .clampNumElements(0, v8s8, v16s8)
332 .clampNumElements(0, v4s16, v8s16)
333 .clampNumElements(0, v2s32, v4s32)
334 .clampNumElements(0, v2s64, v2s64)
337
339 .legalFor({{s32, s32},
340 {s64, s64},
341 {v8s8, v8s8},
342 {v16s8, v16s8},
343 {v4s16, v4s16},
344 {v8s16, v8s16},
345 {v2s32, v2s32},
346 {v4s32, v4s32}})
347 .widenScalarToNextPow2(1, /*Min=*/32)
348 .clampScalar(1, s32, s64)
349 .clampNumElements(0, v8s8, v16s8)
350 .clampNumElements(0, v4s16, v8s16)
351 .clampNumElements(0, v2s32, v4s32)
354 .scalarSameSizeAs(0, 1);
355
356 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
357
359 .lowerIf(isVector(0))
360 .widenScalarToNextPow2(1, /*Min=*/32)
361 .clampScalar(1, s32, s64)
362 .scalarSameSizeAs(0, 1)
363 .legalFor(HasCSSC, {s32, s64})
364 .customFor(!HasCSSC, {s32, s64});
365
366 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
367
368 getActionDefinitionsBuilder(G_BITREVERSE)
369 .legalFor({s32, s64, v8s8, v16s8})
370 .widenScalarToNextPow2(0, /*Min = */ 32)
372 .clampScalar(0, s32, s64)
373 .clampNumElements(0, v8s8, v16s8)
374 .clampNumElements(0, v4s16, v8s16)
375 .clampNumElements(0, v2s32, v4s32)
376 .clampNumElements(0, v2s64, v2s64)
379 .lower();
380
382 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
384 .clampScalar(0, s32, s64)
385 .clampNumElements(0, v4s16, v8s16)
386 .clampNumElements(0, v2s32, v4s32)
387 .clampNumElements(0, v2s64, v2s64)
388 .moreElementsToNextPow2(0);
389
390 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
391 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
392 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
393 .clampNumElements(0, v8s8, v16s8)
394 .clampNumElements(0, v4s16, v8s16)
395 .clampNumElements(0, v2s32, v4s32)
396 .clampMaxNumElements(0, s64, 2)
399 .lower();
400
402 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
403 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
404 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
405 .legalFor({s32, s64, v2s32, v4s32, v2s64})
406 .legalFor(HasFP16, {s16, v4s16, v8s16})
407 .libcallFor({s128})
408 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
409 .minScalarOrElt(0, MinFPScalar)
410 .clampNumElements(0, v4s16, v8s16)
411 .clampNumElements(0, v2s32, v4s32)
412 .clampNumElements(0, v2s64, v2s64)
414
415 getActionDefinitionsBuilder({G_FABS, G_FNEG})
416 .legalFor({s32, s64, v2s32, v4s32, v2s64})
417 .legalFor(HasFP16, {s16, v4s16, v8s16})
418 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
420 .clampNumElements(0, v4s16, v8s16)
421 .clampNumElements(0, v2s32, v4s32)
422 .clampNumElements(0, v2s64, v2s64)
424 .lowerFor({s16, v4s16, v8s16});
425
427 .libcallFor({s32, s64, s128})
428 .minScalar(0, s32)
429 .scalarize(0);
430
431 getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
432 .legalFor({{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
433 .libcallFor({{s64, s128}})
434 .minScalarOrElt(1, MinFPScalar);
435
436 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
437 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
438 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
439 G_FSINH, G_FTANH})
440 // We need a call for these, so we always need to scalarize.
441 .scalarize(0)
442 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
443 .minScalar(0, s32)
444 .libcallFor({s32, s64, s128});
446 .scalarize(0)
447 .minScalar(0, s32)
448 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
449
450 // TODO: Libcall support for s128.
451 // TODO: s16 should be legal with full FP16 support.
452 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
453 .legalFor({{s64, s32}, {s64, s64}});
454
455 // TODO: Custom legalization for mismatched types.
456 getActionDefinitionsBuilder(G_FCOPYSIGN)
458 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
459 [=](const LegalityQuery &Query) {
460 const LLT Ty = Query.Types[0];
461 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
462 })
463 .lower();
464
466
467 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
468 auto &Actions = getActionDefinitionsBuilder(Op);
469
470 if (Op == G_SEXTLOAD)
472
473 // Atomics have zero extending behavior.
474 Actions
475 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
476 {s32, p0, s16, 8},
477 {s32, p0, s32, 8},
478 {s64, p0, s8, 2},
479 {s64, p0, s16, 2},
480 {s64, p0, s32, 4},
481 {s64, p0, s64, 8},
482 {p0, p0, s64, 8},
483 {v2s32, p0, s64, 8}})
484 .widenScalarToNextPow2(0)
485 .clampScalar(0, s32, s64)
486 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
487 // how to do that yet.
488 .unsupportedIfMemSizeNotPow2()
489 // Lower anything left over into G_*EXT and G_LOAD
490 .lower();
491 }
492
493 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
494 const LLT &ValTy = Query.Types[0];
495 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
496 };
497
499 .customIf([=](const LegalityQuery &Query) {
500 return HasRCPC3 && Query.Types[0] == s128 &&
501 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
502 })
503 .customIf([=](const LegalityQuery &Query) {
504 return Query.Types[0] == s128 &&
505 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
506 })
507 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
508 {s16, p0, s16, 8},
509 {s32, p0, s32, 8},
510 {s64, p0, s64, 8},
511 {p0, p0, s64, 8},
512 {s128, p0, s128, 8},
513 {v8s8, p0, s64, 8},
514 {v16s8, p0, s128, 8},
515 {v4s16, p0, s64, 8},
516 {v8s16, p0, s128, 8},
517 {v2s32, p0, s64, 8},
518 {v4s32, p0, s128, 8},
519 {v2s64, p0, s128, 8}})
520 // These extends are also legal
521 .legalForTypesWithMemDesc(
522 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
523 .legalForTypesWithMemDesc({
524 // SVE vscale x 128 bit base sizes
525 {nxv16s8, p0, nxv16s8, 8},
526 {nxv8s16, p0, nxv8s16, 8},
527 {nxv4s32, p0, nxv4s32, 8},
528 {nxv2s64, p0, nxv2s64, 8},
529 })
530 .widenScalarToNextPow2(0, /* MinSize = */ 8)
531 .clampMaxNumElements(0, s8, 16)
532 .clampMaxNumElements(0, s16, 8)
533 .clampMaxNumElements(0, s32, 4)
534 .clampMaxNumElements(0, s64, 2)
535 .clampMaxNumElements(0, p0, 2)
537 .clampScalar(0, s8, s64)
539 [=](const LegalityQuery &Query) {
540 // Clamp extending load results to 32-bits.
541 return Query.Types[0].isScalar() &&
542 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
543 Query.Types[0].getSizeInBits() > 32;
544 },
545 changeTo(0, s32))
546 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
547 .bitcastIf(typeInSet(0, {v4s8}),
548 [=](const LegalityQuery &Query) {
549 const LLT VecTy = Query.Types[0];
550 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
551 })
552 .customIf(IsPtrVecPred)
553 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
555
557 .customIf([=](const LegalityQuery &Query) {
558 return HasRCPC3 && Query.Types[0] == s128 &&
559 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
560 })
561 .customIf([=](const LegalityQuery &Query) {
562 return Query.Types[0] == s128 &&
563 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
564 })
565 .legalForTypesWithMemDesc(
566 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
567 {s32, p0, s8, 8}, // truncstorei8 from s32
568 {s64, p0, s8, 8}, // truncstorei8 from s64
569 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
570 {s64, p0, s16, 8}, // truncstorei16 from s64
571 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
572 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
573 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
574 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
575 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
576 .legalForTypesWithMemDesc({
577 // SVE vscale x 128 bit base sizes
578 // TODO: Add nxv2p0. Consider bitcastIf.
579 // See #92130
580 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
581 {nxv16s8, p0, nxv16s8, 8},
582 {nxv8s16, p0, nxv8s16, 8},
583 {nxv4s32, p0, nxv4s32, 8},
584 {nxv2s64, p0, nxv2s64, 8},
585 })
586 .clampScalar(0, s8, s64)
587 .minScalarOrElt(0, s8)
588 .lowerIf([=](const LegalityQuery &Query) {
589 return Query.Types[0].isScalar() &&
590 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
591 })
592 // Maximum: sN * k = 128
593 .clampMaxNumElements(0, s8, 16)
594 .clampMaxNumElements(0, s16, 8)
595 .clampMaxNumElements(0, s32, 4)
596 .clampMaxNumElements(0, s64, 2)
597 .clampMaxNumElements(0, p0, 2)
599 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
600 .bitcastIf(all(typeInSet(0, {v4s8}),
601 LegalityPredicate([=](const LegalityQuery &Query) {
602 return Query.Types[0].getSizeInBits() ==
603 Query.MMODescrs[0].MemoryTy.getSizeInBits();
604 })),
605 [=](const LegalityQuery &Query) {
606 const LLT VecTy = Query.Types[0];
607 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
608 })
609 .customIf(IsPtrVecPred)
610 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
612 .lower();
613
614 getActionDefinitionsBuilder(G_INDEXED_STORE)
615 // Idx 0 == Ptr, Idx 1 == Val
616 // TODO: we can implement legalizations but as of now these are
617 // generated in a very specific way.
619 {p0, s8, s8, 8},
620 {p0, s16, s16, 8},
621 {p0, s32, s8, 8},
622 {p0, s32, s16, 8},
623 {p0, s32, s32, 8},
624 {p0, s64, s64, 8},
625 {p0, p0, p0, 8},
626 {p0, v8s8, v8s8, 8},
627 {p0, v16s8, v16s8, 8},
628 {p0, v4s16, v4s16, 8},
629 {p0, v8s16, v8s16, 8},
630 {p0, v2s32, v2s32, 8},
631 {p0, v4s32, v4s32, 8},
632 {p0, v2s64, v2s64, 8},
633 {p0, v2p0, v2p0, 8},
634 {p0, s128, s128, 8},
635 })
636 .unsupported();
637
638 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
639 LLT LdTy = Query.Types[0];
640 LLT PtrTy = Query.Types[1];
641 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
642 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
643 return false;
644 if (PtrTy != p0)
645 return false;
646 return true;
647 };
648 getActionDefinitionsBuilder(G_INDEXED_LOAD)
651 .legalIf(IndexedLoadBasicPred)
652 .unsupported();
653 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
654 .unsupportedIf(
656 .legalIf(all(typeInSet(0, {s16, s32, s64}),
657 LegalityPredicate([=](const LegalityQuery &Q) {
658 LLT LdTy = Q.Types[0];
659 LLT PtrTy = Q.Types[1];
660 LLT MemTy = Q.MMODescrs[0].MemoryTy;
661 if (PtrTy != p0)
662 return false;
663 if (LdTy == s16)
664 return MemTy == s8;
665 if (LdTy == s32)
666 return MemTy == s8 || MemTy == s16;
667 if (LdTy == s64)
668 return MemTy == s8 || MemTy == s16 || MemTy == s32;
669 return false;
670 })))
671 .unsupported();
672
673 // Constants
675 .legalFor({p0, s8, s16, s32, s64})
676 .widenScalarToNextPow2(0)
677 .clampScalar(0, s8, s64);
678 getActionDefinitionsBuilder(G_FCONSTANT)
679 .legalFor({s32, s64, s128})
680 .legalFor(HasFP16, {s16})
681 .clampScalar(0, MinFPScalar, s128);
682
683 // FIXME: fix moreElementsToNextPow2
685 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
687 .clampScalar(1, s32, s64)
688 .clampScalar(0, s32, s32)
689 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
690 .minScalarEltSameAsIf(
691 [=](const LegalityQuery &Query) {
692 const LLT &Ty = Query.Types[0];
693 const LLT &SrcTy = Query.Types[1];
694 return Ty.isVector() && !SrcTy.isPointerVector() &&
695 Ty.getElementType() != SrcTy.getElementType();
696 },
697 0, 1)
698 .minScalarOrEltIf(
699 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
700 1, s32)
701 .minScalarOrEltIf(
702 [=](const LegalityQuery &Query) {
703 return Query.Types[1].isPointerVector();
704 },
705 0, s64)
707 .clampNumElements(1, v8s8, v16s8)
708 .clampNumElements(1, v4s16, v8s16)
709 .clampNumElements(1, v2s32, v4s32)
710 .clampNumElements(1, v2s64, v2s64)
711 .clampNumElements(1, v2p0, v2p0)
712 .customIf(isVector(0));
713
715 .legalFor({{s32, s32},
716 {s32, s64},
717 {v4s32, v4s32},
718 {v2s32, v2s32},
719 {v2s64, v2s64}})
720 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
722 .clampScalar(0, s32, s32)
723 .minScalarOrElt(1, MinFPScalar)
724 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
725 .minScalarEltSameAsIf(
726 [=](const LegalityQuery &Query) {
727 const LLT &Ty = Query.Types[0];
728 const LLT &SrcTy = Query.Types[1];
729 return Ty.isVector() && !SrcTy.isPointerVector() &&
730 Ty.getElementType() != SrcTy.getElementType();
731 },
732 0, 1)
733 .clampNumElements(1, v4s16, v8s16)
734 .clampNumElements(1, v2s32, v4s32)
735 .clampMaxNumElements(1, s64, 2)
736 .moreElementsToNextPow2(1)
737 .libcallFor({{s32, s128}});
738
739 // Extensions
740 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
741 unsigned DstSize = Query.Types[0].getSizeInBits();
742
743 // Handle legal vectors using legalFor
744 if (Query.Types[0].isVector())
745 return false;
746
747 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
748 return false; // Extending to a scalar s128 needs narrowing.
749
750 const LLT &SrcTy = Query.Types[1];
751
752 // Make sure we fit in a register otherwise. Don't bother checking that
753 // the source type is below 128 bits. We shouldn't be allowing anything
754 // through which is wider than the destination in the first place.
755 unsigned SrcSize = SrcTy.getSizeInBits();
756 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
757 return false;
758
759 return true;
760 };
761 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
762 .legalIf(ExtLegalFunc)
763 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
764 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
766 .clampMaxNumElements(1, s8, 8)
767 .clampMaxNumElements(1, s16, 4)
768 .clampMaxNumElements(1, s32, 2)
769 // Tries to convert a large EXTEND into two smaller EXTENDs
770 .lowerIf([=](const LegalityQuery &Query) {
771 return (Query.Types[0].getScalarSizeInBits() >
772 Query.Types[1].getScalarSizeInBits() * 2) &&
773 Query.Types[0].isVector() &&
774 (Query.Types[1].getScalarSizeInBits() == 8 ||
775 Query.Types[1].getScalarSizeInBits() == 16);
776 })
777 .clampMinNumElements(1, s8, 8)
778 .clampMinNumElements(1, s16, 4)
780
782 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
784 .clampMaxNumElements(0, s8, 8)
785 .clampMaxNumElements(0, s16, 4)
786 .clampMaxNumElements(0, s32, 2)
787 .minScalarOrEltIf(
788 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
789 0, s8)
790 .lowerIf([=](const LegalityQuery &Query) {
791 LLT DstTy = Query.Types[0];
792 LLT SrcTy = Query.Types[1];
793 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
794 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
795 })
796 .clampMinNumElements(0, s8, 8)
797 .clampMinNumElements(0, s16, 4)
798 .alwaysLegal();
799
800 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
801 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}});
802
803 getActionDefinitionsBuilder(G_SEXT_INREG)
804 .legalFor({s32, s64})
805 .legalFor(PackedVectorAllTypeList)
806 .maxScalar(0, s64)
807 .clampNumElements(0, v8s8, v16s8)
808 .clampNumElements(0, v4s16, v8s16)
809 .clampNumElements(0, v2s32, v4s32)
810 .clampMaxNumElements(0, s64, 2)
811 .lower();
812
813 // FP conversions
815 .legalFor(
816 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
817 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
818 .clampNumElements(0, v4s16, v4s16)
819 .clampNumElements(0, v2s32, v2s32)
820 .scalarize(0);
821
823 .legalFor(
824 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
825 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
826 .clampNumElements(0, v4s32, v4s32)
827 .clampNumElements(0, v2s64, v2s64)
828 .scalarize(0);
829
830 // Conversions
831 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
832 .legalFor({{s32, s32},
833 {s64, s32},
834 {s32, s64},
835 {s64, s64},
836 {v2s32, v2s32},
837 {v4s32, v4s32},
838 {v2s64, v2s64}})
839 .legalFor(HasFP16,
840 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
841 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
843 // The range of a fp16 value fits into an i17, so we can lower the width
844 // to i64.
846 [=](const LegalityQuery &Query) {
847 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
848 },
849 changeTo(0, s64))
851 .widenScalarOrEltToNextPow2OrMinSize(0)
852 .minScalar(0, s32)
853 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
854 .widenScalarIf(
855 [=](const LegalityQuery &Query) {
856 return Query.Types[0].getScalarSizeInBits() <= 64 &&
857 Query.Types[0].getScalarSizeInBits() >
858 Query.Types[1].getScalarSizeInBits();
859 },
861 .widenScalarIf(
862 [=](const LegalityQuery &Query) {
863 return Query.Types[1].getScalarSizeInBits() <= 64 &&
864 Query.Types[0].getScalarSizeInBits() <
865 Query.Types[1].getScalarSizeInBits();
866 },
868 .clampNumElements(0, v4s16, v8s16)
869 .clampNumElements(0, v2s32, v4s32)
870 .clampMaxNumElements(0, s64, 2)
871 .libcallFor(
872 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
873
874 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
875 .legalFor({{s32, s32},
876 {s64, s32},
877 {s32, s64},
878 {s64, s64},
879 {v2s32, v2s32},
880 {v4s32, v4s32},
881 {v2s64, v2s64}})
882 .legalFor(
883 HasFP16,
884 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
885 // Handle types larger than i64 by scalarizing/lowering.
886 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
888 // The range of a fp16 value fits into an i17, so we can lower the width
889 // to i64.
891 [=](const LegalityQuery &Query) {
892 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
893 },
894 changeTo(0, s64))
895 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
897 .widenScalarToNextPow2(0, /*MinSize=*/32)
898 .minScalar(0, s32)
899 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
900 .widenScalarIf(
901 [=](const LegalityQuery &Query) {
902 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
903 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
904 ITySize > Query.Types[1].getScalarSizeInBits();
905 },
907 .widenScalarIf(
908 [=](const LegalityQuery &Query) {
909 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
910 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
911 Query.Types[0].getScalarSizeInBits() < FTySize;
912 },
915 .clampNumElements(0, v4s16, v8s16)
916 .clampNumElements(0, v2s32, v4s32)
917 .clampMaxNumElements(0, s64, 2);
918
919 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
920 .legalFor({{s32, s32},
921 {s64, s32},
922 {s32, s64},
923 {s64, s64},
924 {v2s32, v2s32},
925 {v4s32, v4s32},
926 {v2s64, v2s64}})
927 .legalFor(HasFP16,
928 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
929 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
933 .minScalar(1, s32)
934 .lowerIf([](const LegalityQuery &Query) {
935 return Query.Types[1].isVector() &&
936 Query.Types[1].getScalarSizeInBits() == 64 &&
937 Query.Types[0].getScalarSizeInBits() == 16;
938 })
939 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
941 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
942 [](const LegalityQuery &Query) {
943 return Query.Types[0].getScalarSizeInBits() == 32 &&
944 Query.Types[1].getScalarSizeInBits() == 64;
945 },
946 0)
947 .widenScalarIf(
948 [](const LegalityQuery &Query) {
949 return Query.Types[1].getScalarSizeInBits() <= 64 &&
950 Query.Types[0].getScalarSizeInBits() <
951 Query.Types[1].getScalarSizeInBits();
952 },
954 .widenScalarIf(
955 [](const LegalityQuery &Query) {
956 return Query.Types[0].getScalarSizeInBits() <= 64 &&
957 Query.Types[0].getScalarSizeInBits() >
958 Query.Types[1].getScalarSizeInBits();
959 },
961 .clampNumElements(0, v4s16, v8s16)
962 .clampNumElements(0, v2s32, v4s32)
963 .clampMaxNumElements(0, s64, 2)
964 .libcallFor({{s16, s128},
965 {s32, s128},
966 {s64, s128},
967 {s128, s128},
968 {s128, s32},
969 {s128, s64}});
970
971 // Control-flow
974 .legalFor({s32})
975 .clampScalar(0, s32, s32);
976 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
977
979 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
980 .widenScalarToNextPow2(0)
981 .clampScalar(0, s32, s64)
982 .clampScalar(1, s32, s32)
985 .lowerIf(isVector(0));
986
987 // Pointer-handling
988 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
989
990 if (TM.getCodeModel() == CodeModel::Small)
991 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
992 else
993 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
994
995 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
996 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
997
999 .legalFor({{s64, p0}, {v2s64, v2p0}})
1000 .widenScalarToNextPow2(0, 64)
1001 .clampScalar(0, s64, s64)
1002 .clampMaxNumElements(0, s64, 2);
1003
1004 getActionDefinitionsBuilder(G_INTTOPTR)
1005 .unsupportedIf([&](const LegalityQuery &Query) {
1006 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1007 })
1008 .legalFor({{p0, s64}, {v2p0, v2s64}})
1009 .clampMaxNumElements(1, s64, 2);
1010
1011 // Casts for 32 and 64-bit width type are just copies.
1012 // Same for 128-bit width type, except they are on the FPR bank.
1014 // Keeping 32-bit instructions legal to prevent regression in some tests
1015 .legalForCartesianProduct({s32, v2s16, v4s8})
1016 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1017 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1018 .customIf([=](const LegalityQuery &Query) {
1019 // Handle casts from i1 vectors to scalars.
1020 LLT DstTy = Query.Types[0];
1021 LLT SrcTy = Query.Types[1];
1022 return DstTy.isScalar() && SrcTy.isVector() &&
1023 SrcTy.getScalarSizeInBits() == 1;
1024 })
1025 .lowerIf([=](const LegalityQuery &Query) {
1026 return Query.Types[0].isVector() != Query.Types[1].isVector();
1027 })
1029 .clampNumElements(0, v8s8, v16s8)
1030 .clampNumElements(0, v4s16, v8s16)
1031 .clampNumElements(0, v2s32, v4s32)
1032 .lower();
1033
1034 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1035
1036 // va_list must be a pointer, but most sized types are pretty easy to handle
1037 // as the destination.
1039 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1040 .clampScalar(0, s8, s64)
1041 .widenScalarToNextPow2(0, /*Min*/ 8);
1042
1043 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1044 .lowerIf(
1045 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1046
1047 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1048
1049 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1050 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1051 .customFor(!UseOutlineAtomics, {{s128, p0}})
1052 .libcallFor(UseOutlineAtomics,
1053 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1054 .clampScalar(0, s32, s64);
1055
1056 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1057 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1058 G_ATOMICRMW_XOR})
1059 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1060 .libcallFor(UseOutlineAtomics,
1061 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1062 .clampScalar(0, s32, s64);
1063
1064 // Do not outline these atomics operations, as per comment in
1065 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1067 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1068 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1069 .clampScalar(0, s32, s64);
1070
1071 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1072
1073 // Merge/Unmerge
1074 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1075 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1076 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1078 .widenScalarToNextPow2(LitTyIdx, 8)
1079 .widenScalarToNextPow2(BigTyIdx, 32)
1080 .clampScalar(LitTyIdx, s8, s64)
1081 .clampScalar(BigTyIdx, s32, s128)
1082 .legalIf([=](const LegalityQuery &Q) {
1083 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1084 case 32:
1085 case 64:
1086 case 128:
1087 break;
1088 default:
1089 return false;
1090 }
1091 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1092 case 8:
1093 case 16:
1094 case 32:
1095 case 64:
1096 return true;
1097 default:
1098 return false;
1099 }
1100 });
1101 }
1102
1103 // TODO : nxv4s16, nxv2s16, nxv2s32
1104 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1105 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1106 {s16, nxv8s16, s64},
1107 {s32, nxv4s32, s64},
1108 {s64, nxv2s64, s64}})
1109 .unsupportedIf([=](const LegalityQuery &Query) {
1110 const LLT &EltTy = Query.Types[1].getElementType();
1111 if (Query.Types[1].isScalableVector())
1112 return false;
1113 return Query.Types[0] != EltTy;
1114 })
1115 .minScalar(2, s64)
1116 .customIf([=](const LegalityQuery &Query) {
1117 const LLT &VecTy = Query.Types[1];
1118 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1119 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1120 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1121 })
1122 .minScalarOrEltIf(
1123 [=](const LegalityQuery &Query) {
1124 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1125 // cause the total vec size to be > 128b.
1126 return Query.Types[1].isFixedVector() &&
1127 Query.Types[1].getNumElements() <= 2;
1128 },
1129 0, s64)
1130 .minScalarOrEltIf(
1131 [=](const LegalityQuery &Query) {
1132 return Query.Types[1].isFixedVector() &&
1133 Query.Types[1].getNumElements() <= 4;
1134 },
1135 0, s32)
1136 .minScalarOrEltIf(
1137 [=](const LegalityQuery &Query) {
1138 return Query.Types[1].isFixedVector() &&
1139 Query.Types[1].getNumElements() <= 8;
1140 },
1141 0, s16)
1142 .minScalarOrEltIf(
1143 [=](const LegalityQuery &Query) {
1144 return Query.Types[1].isFixedVector() &&
1145 Query.Types[1].getNumElements() <= 16;
1146 },
1147 0, s8)
1148 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1150 .clampMaxNumElements(1, s64, 2)
1151 .clampMaxNumElements(1, s32, 4)
1152 .clampMaxNumElements(1, s16, 8)
1153 .clampMaxNumElements(1, s8, 16)
1154 .clampMaxNumElements(1, p0, 2)
1156
1157 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1158 .legalIf(
1159 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1160 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1161 {nxv8s16, s32, s64},
1162 {nxv4s32, s32, s64},
1163 {nxv2s64, s64, s64}})
1165 .widenVectorEltsToVectorMinSize(0, 64)
1166 .clampNumElements(0, v8s8, v16s8)
1167 .clampNumElements(0, v4s16, v8s16)
1168 .clampNumElements(0, v2s32, v4s32)
1169 .clampMaxNumElements(0, s64, 2)
1170 .clampMaxNumElements(0, p0, 2)
1171 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
1172
1173 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1174 .legalFor({{v8s8, s8},
1175 {v16s8, s8},
1176 {v4s16, s16},
1177 {v8s16, s16},
1178 {v2s32, s32},
1179 {v4s32, s32},
1180 {v2s64, s64},
1181 {v2p0, p0}})
1182 .clampNumElements(0, v4s32, v4s32)
1183 .clampNumElements(0, v2s64, v2s64)
1184 .minScalarOrElt(0, s8)
1187 .minScalarSameAs(1, 0);
1188
1189 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1190
1191 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1192 .legalIf([=](const LegalityQuery &Query) {
1193 const LLT &DstTy = Query.Types[0];
1194 const LLT &SrcTy = Query.Types[1];
1195 // For now just support the TBL2 variant which needs the source vectors
1196 // to be the same size as the dest.
1197 if (DstTy != SrcTy)
1198 return false;
1199 return llvm::is_contained(
1200 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1201 })
1202 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
1203 // destinations, we just want those lowered into G_BUILD_VECTOR or
1204 // G_EXTRACT_ELEMENT.
1205 .lowerIf([=](const LegalityQuery &Query) {
1206 return !Query.Types[0].isVector() || !Query.Types[1].isVector();
1207 })
1208 .moreElementsIf(
1209 [](const LegalityQuery &Query) {
1210 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1211 Query.Types[0].getNumElements() >
1212 Query.Types[1].getNumElements();
1213 },
1214 changeTo(1, 0))
1216 .moreElementsIf(
1217 [](const LegalityQuery &Query) {
1218 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1219 Query.Types[0].getNumElements() <
1220 Query.Types[1].getNumElements();
1221 },
1222 changeTo(0, 1))
1223 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1224 .clampNumElements(0, v8s8, v16s8)
1225 .clampNumElements(0, v4s16, v8s16)
1226 .clampNumElements(0, v4s32, v4s32)
1227 .clampNumElements(0, v2s64, v2s64)
1228 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1229 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1230 // Bitcast pointers vector to i64.
1231 const LLT DstTy = Query.Types[0];
1232 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1233 });
1234
1235 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1236 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1237 .bitcastIf(
1238 [=](const LegalityQuery &Query) {
1239 return Query.Types[0].getSizeInBits() <= 128 &&
1240 Query.Types[1].getSizeInBits() <= 64;
1241 },
1242 [=](const LegalityQuery &Query) {
1243 const LLT DstTy = Query.Types[0];
1244 const LLT SrcTy = Query.Types[1];
1245 return std::pair(
1246 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1249 SrcTy.getNumElements())));
1250 });
1251
1252 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1253 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1255 .immIdx(0); // Inform verifier imm idx 0 is handled.
1256
1257 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1258 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1259 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1260
1261 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1262
1263 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1264
1265 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1266
1267 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1268
1269 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1270
1271 if (ST.hasMOPS()) {
1272 // G_BZERO is not supported. Currently it is only emitted by
1273 // PreLegalizerCombiner for G_MEMSET with zero constant.
1275
1277 .legalForCartesianProduct({p0}, {s64}, {s64})
1278 .customForCartesianProduct({p0}, {s8}, {s64})
1279 .immIdx(0); // Inform verifier imm idx 0 is handled.
1280
1281 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1282 .legalForCartesianProduct({p0}, {p0}, {s64})
1283 .immIdx(0); // Inform verifier imm idx 0 is handled.
1284
1285 // G_MEMCPY_INLINE does not have a tailcall immediate
1286 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1287 .legalForCartesianProduct({p0}, {p0}, {s64});
1288
1289 } else {
1290 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1291 .libcall();
1292 }
1293
1294 // For fadd reductions we have pairwise operations available. We treat the
1295 // usual legal types as legal and handle the lowering to pairwise instructions
1296 // later.
1297 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1298 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1299 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1300 .minScalarOrElt(0, MinFPScalar)
1301 .clampMaxNumElements(1, s64, 2)
1302 .clampMaxNumElements(1, s32, 4)
1303 .clampMaxNumElements(1, s16, 8)
1305 .scalarize(1)
1306 .lower();
1307
1308 // For fmul reductions we need to split up into individual operations. We
1309 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1310 // smaller types, followed by scalarizing what remains.
1311 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1312 .minScalarOrElt(0, MinFPScalar)
1313 .clampMaxNumElements(1, s64, 2)
1314 .clampMaxNumElements(1, s32, 4)
1315 .clampMaxNumElements(1, s16, 8)
1316 .clampMaxNumElements(1, s32, 2)
1317 .clampMaxNumElements(1, s16, 4)
1318 .scalarize(1)
1319 .lower();
1320
1321 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1322 .scalarize(2)
1323 .lower();
1324
1325 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1326 .legalFor({{s8, v8s8},
1327 {s8, v16s8},
1328 {s16, v4s16},
1329 {s16, v8s16},
1330 {s32, v2s32},
1331 {s32, v4s32},
1332 {s64, v2s64}})
1334 .clampMaxNumElements(1, s64, 2)
1335 .clampMaxNumElements(1, s32, 4)
1336 .clampMaxNumElements(1, s16, 8)
1337 .clampMaxNumElements(1, s8, 16)
1338 .widenVectorEltsToVectorMinSize(1, 64)
1339 .scalarize(1);
1340
1341 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1342 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1343 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1344 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1345 .minScalarOrElt(0, MinFPScalar)
1346 .clampMaxNumElements(1, s64, 2)
1347 .clampMaxNumElements(1, s32, 4)
1348 .clampMaxNumElements(1, s16, 8)
1349 .lower();
1350
1351 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1352 .clampMaxNumElements(1, s32, 2)
1353 .clampMaxNumElements(1, s16, 4)
1354 .clampMaxNumElements(1, s8, 8)
1355 .scalarize(1)
1356 .lower();
1357
1359 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1360 .legalFor({{s8, v8s8},
1361 {s8, v16s8},
1362 {s16, v4s16},
1363 {s16, v8s16},
1364 {s32, v2s32},
1365 {s32, v4s32}})
1366 .moreElementsIf(
1367 [=](const LegalityQuery &Query) {
1368 return Query.Types[1].isVector() &&
1369 Query.Types[1].getElementType() != s8 &&
1370 Query.Types[1].getNumElements() & 1;
1371 },
1373 .clampMaxNumElements(1, s64, 2)
1374 .clampMaxNumElements(1, s32, 4)
1375 .clampMaxNumElements(1, s16, 8)
1376 .clampMaxNumElements(1, s8, 16)
1377 .scalarize(1)
1378 .lower();
1379
1381 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1382 // Try to break down into smaller vectors as long as they're at least 64
1383 // bits. This lets us use vector operations for some parts of the
1384 // reduction.
1385 .fewerElementsIf(
1386 [=](const LegalityQuery &Q) {
1387 LLT SrcTy = Q.Types[1];
1388 if (SrcTy.isScalar())
1389 return false;
1390 if (!isPowerOf2_32(SrcTy.getNumElements()))
1391 return false;
1392 // We can usually perform 64b vector operations.
1393 return SrcTy.getSizeInBits() > 64;
1394 },
1395 [=](const LegalityQuery &Q) {
1396 LLT SrcTy = Q.Types[1];
1397 return std::make_pair(1, SrcTy.divide(2));
1398 })
1399 .scalarize(1)
1400 .lower();
1401
1402 // TODO: Update this to correct handling when adding AArch64/SVE support.
1403 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1404
1405 // Access to floating-point environment.
1406 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1407 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1408 .libcall();
1409
1410 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1411
1412 getActionDefinitionsBuilder(G_PREFETCH).custom();
1413
1414 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1415
1417 verify(*ST.getInstrInfo());
1418}
1419
1422 LostDebugLocObserver &LocObserver) const {
1423 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1424 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1425 GISelChangeObserver &Observer = Helper.Observer;
1426 switch (MI.getOpcode()) {
1427 default:
1428 // No idea what to do.
1429 return false;
1430 case TargetOpcode::G_VAARG:
1431 return legalizeVaArg(MI, MRI, MIRBuilder);
1432 case TargetOpcode::G_LOAD:
1433 case TargetOpcode::G_STORE:
1434 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1435 case TargetOpcode::G_SHL:
1436 case TargetOpcode::G_ASHR:
1437 case TargetOpcode::G_LSHR:
1438 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1439 case TargetOpcode::G_GLOBAL_VALUE:
1440 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1441 case TargetOpcode::G_SBFX:
1442 case TargetOpcode::G_UBFX:
1443 return legalizeBitfieldExtract(MI, MRI, Helper);
1444 case TargetOpcode::G_FSHL:
1445 case TargetOpcode::G_FSHR:
1446 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1447 case TargetOpcode::G_ROTR:
1448 return legalizeRotate(MI, MRI, Helper);
1449 case TargetOpcode::G_CTPOP:
1450 return legalizeCTPOP(MI, MRI, Helper);
1451 case TargetOpcode::G_ATOMIC_CMPXCHG:
1452 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1453 case TargetOpcode::G_CTTZ:
1454 return legalizeCTTZ(MI, Helper);
1455 case TargetOpcode::G_BZERO:
1456 case TargetOpcode::G_MEMCPY:
1457 case TargetOpcode::G_MEMMOVE:
1458 case TargetOpcode::G_MEMSET:
1459 return legalizeMemOps(MI, Helper);
1460 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1461 return legalizeExtractVectorElt(MI, MRI, Helper);
1462 case TargetOpcode::G_DYN_STACKALLOC:
1463 return legalizeDynStackAlloc(MI, Helper);
1464 case TargetOpcode::G_PREFETCH:
1465 return legalizePrefetch(MI, Helper);
1466 case TargetOpcode::G_ABS:
1467 return Helper.lowerAbsToCNeg(MI);
1468 case TargetOpcode::G_ICMP:
1469 return legalizeICMP(MI, MRI, MIRBuilder);
1470 case TargetOpcode::G_BITCAST:
1471 return legalizeBitcast(MI, Helper);
1472 }
1473
1474 llvm_unreachable("expected switch to return");
1475}
1476
1477bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1478 LegalizerHelper &Helper) const {
1479 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1480 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1481 // We're trying to handle casts from i1 vectors to scalars but reloading from
1482 // stack.
1483 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1484 SrcTy.getElementType() != LLT::scalar(1))
1485 return false;
1486
1487 Helper.createStackStoreLoad(DstReg, SrcReg);
1488 MI.eraseFromParent();
1489 return true;
1490}
1491
1492bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1494 MachineIRBuilder &MIRBuilder,
1495 GISelChangeObserver &Observer,
1496 LegalizerHelper &Helper) const {
1497 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1498 MI.getOpcode() == TargetOpcode::G_FSHR);
1499
1500 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1501 // lowering
1502 Register ShiftNo = MI.getOperand(3).getReg();
1503 LLT ShiftTy = MRI.getType(ShiftNo);
1504 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1505
1506 // Adjust shift amount according to Opcode (FSHL/FSHR)
1507 // Convert FSHL to FSHR
1508 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1509 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1510
1511 // Lower non-constant shifts and leave zero shifts to the optimizer.
1512 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1513 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1515
1516 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1517
1518 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1519
1520 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1521 // in the range of 0 <-> BitWidth, it is legal
1522 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1523 VRegAndVal->Value.ult(BitWidth))
1524 return true;
1525
1526 // Cast the ShiftNumber to a 64-bit type
1527 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1528
1529 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1530 Observer.changingInstr(MI);
1531 MI.getOperand(3).setReg(Cast64.getReg(0));
1532 Observer.changedInstr(MI);
1533 }
1534 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1535 // instruction
1536 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1537 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1538 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1539 Cast64.getReg(0)});
1540 MI.eraseFromParent();
1541 }
1542 return true;
1543}
1544
1545bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1547 MachineIRBuilder &MIRBuilder) const {
1548 Register DstReg = MI.getOperand(0).getReg();
1549 Register SrcReg1 = MI.getOperand(2).getReg();
1550 Register SrcReg2 = MI.getOperand(3).getReg();
1551 LLT DstTy = MRI.getType(DstReg);
1552 LLT SrcTy = MRI.getType(SrcReg1);
1553
1554 // Check the vector types are legal
1555 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1556 DstTy.getNumElements() != SrcTy.getNumElements() ||
1557 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1558 return false;
1559
1560 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1561 // following passes
1562 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1563 if (Pred != CmpInst::ICMP_NE)
1564 return true;
1565 Register CmpReg =
1566 MIRBuilder
1567 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1568 .getReg(0);
1569 MIRBuilder.buildNot(DstReg, CmpReg);
1570
1571 MI.eraseFromParent();
1572 return true;
1573}
1574
1575bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1577 LegalizerHelper &Helper) const {
1578 // To allow for imported patterns to match, we ensure that the rotate amount
1579 // is 64b with an extension.
1580 Register AmtReg = MI.getOperand(2).getReg();
1581 LLT AmtTy = MRI.getType(AmtReg);
1582 (void)AmtTy;
1583 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1584 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1585 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1586 Helper.Observer.changingInstr(MI);
1587 MI.getOperand(2).setReg(NewAmt.getReg(0));
1588 Helper.Observer.changedInstr(MI);
1589 return true;
1590}
1591
1592bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1594 GISelChangeObserver &Observer) const {
1595 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1596 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1597 // G_ADD_LOW instructions.
1598 // By splitting this here, we can optimize accesses in the small code model by
1599 // folding in the G_ADD_LOW into the load/store offset.
1600 auto &GlobalOp = MI.getOperand(1);
1601 // Don't modify an intrinsic call.
1602 if (GlobalOp.isSymbol())
1603 return true;
1604 const auto* GV = GlobalOp.getGlobal();
1605 if (GV->isThreadLocal())
1606 return true; // Don't want to modify TLS vars.
1607
1608 auto &TM = ST->getTargetLowering()->getTargetMachine();
1609 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1610
1611 if (OpFlags & AArch64II::MO_GOT)
1612 return true;
1613
1614 auto Offset = GlobalOp.getOffset();
1615 Register DstReg = MI.getOperand(0).getReg();
1616 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1617 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1618 // Set the regclass on the dest reg too.
1619 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1620
1621 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1622 // by creating a MOVK that sets bits 48-63 of the register to (global address
1623 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1624 // prevent an incorrect tag being generated during relocation when the
1625 // global appears before the code section. Without the offset, a global at
1626 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1627 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1628 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1629 // instead of `0xf`.
1630 // This assumes that we're in the small code model so we can assume a binary
1631 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1632 // binary must also be loaded into address range [0, 2^48). Both of these
1633 // properties need to be ensured at runtime when using tagged addresses.
1634 if (OpFlags & AArch64II::MO_TAGGED) {
1635 assert(!Offset &&
1636 "Should not have folded in an offset for a tagged global!");
1637 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1638 .addGlobalAddress(GV, 0x100000000,
1640 .addImm(48);
1641 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1642 }
1643
1644 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1645 .addGlobalAddress(GV, Offset,
1647 MI.eraseFromParent();
1648 return true;
1649}
1650
1652 MachineInstr &MI) const {
1653 MachineIRBuilder &MIB = Helper.MIRBuilder;
1654 MachineRegisterInfo &MRI = *MIB.getMRI();
1655
1656 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1657 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1658 MI.eraseFromParent();
1659 return true;
1660 };
1661 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1662 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1663 {MI.getOperand(2), MI.getOperand(3)});
1664 MI.eraseFromParent();
1665 return true;
1666 };
1667 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1668 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1669 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1670 MI.eraseFromParent();
1671 return true;
1672 };
1673
1674 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1675 switch (IntrinsicID) {
1676 case Intrinsic::vacopy: {
1677 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1678 unsigned VaListSize =
1679 (ST->isTargetDarwin() || ST->isTargetWindows())
1680 ? PtrSize
1681 : ST->isTargetILP32() ? 20 : 32;
1682
1683 MachineFunction &MF = *MI.getMF();
1685 LLT::scalar(VaListSize * 8));
1686 MIB.buildLoad(Val, MI.getOperand(2),
1689 VaListSize, Align(PtrSize)));
1690 MIB.buildStore(Val, MI.getOperand(1),
1693 VaListSize, Align(PtrSize)));
1694 MI.eraseFromParent();
1695 return true;
1696 }
1697 case Intrinsic::get_dynamic_area_offset: {
1698 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1699 MI.eraseFromParent();
1700 return true;
1701 }
1702 case Intrinsic::aarch64_mops_memset_tag: {
1703 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1704 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1705 // the instruction).
1706 auto &Value = MI.getOperand(3);
1707 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1708 Value.setReg(ExtValueReg);
1709 return true;
1710 }
1711 case Intrinsic::aarch64_prefetch: {
1712 auto &AddrVal = MI.getOperand(1);
1713
1714 int64_t IsWrite = MI.getOperand(2).getImm();
1715 int64_t Target = MI.getOperand(3).getImm();
1716 int64_t IsStream = MI.getOperand(4).getImm();
1717 int64_t IsData = MI.getOperand(5).getImm();
1718
1719 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1720 (!IsData << 3) | // IsDataCache bit
1721 (Target << 1) | // Cache level bits
1722 (unsigned)IsStream; // Stream bit
1723
1724 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1725 MI.eraseFromParent();
1726 return true;
1727 }
1728 case Intrinsic::aarch64_neon_uaddv:
1729 case Intrinsic::aarch64_neon_saddv:
1730 case Intrinsic::aarch64_neon_umaxv:
1731 case Intrinsic::aarch64_neon_smaxv:
1732 case Intrinsic::aarch64_neon_uminv:
1733 case Intrinsic::aarch64_neon_sminv: {
1734 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1735 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1736 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1737
1738 auto OldDst = MI.getOperand(0).getReg();
1739 auto OldDstTy = MRI.getType(OldDst);
1740 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1741 if (OldDstTy == NewDstTy)
1742 return true;
1743
1744 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1745
1746 Helper.Observer.changingInstr(MI);
1747 MI.getOperand(0).setReg(NewDst);
1748 Helper.Observer.changedInstr(MI);
1749
1750 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1751 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1752 OldDst, NewDst);
1753
1754 return true;
1755 }
1756 case Intrinsic::aarch64_neon_uaddlp:
1757 case Intrinsic::aarch64_neon_saddlp: {
1758 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1759 ? AArch64::G_UADDLP
1760 : AArch64::G_SADDLP;
1761 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1762 MI.eraseFromParent();
1763
1764 return true;
1765 }
1766 case Intrinsic::aarch64_neon_uaddlv:
1767 case Intrinsic::aarch64_neon_saddlv: {
1768 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1769 ? AArch64::G_UADDLV
1770 : AArch64::G_SADDLV;
1771 Register DstReg = MI.getOperand(0).getReg();
1772 Register SrcReg = MI.getOperand(2).getReg();
1773 LLT DstTy = MRI.getType(DstReg);
1774
1775 LLT MidTy, ExtTy;
1776 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1777 MidTy = LLT::fixed_vector(4, 32);
1778 ExtTy = LLT::scalar(32);
1779 } else {
1780 MidTy = LLT::fixed_vector(2, 64);
1781 ExtTy = LLT::scalar(64);
1782 }
1783
1784 Register MidReg =
1785 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1786 Register ZeroReg =
1787 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1788 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1789 {MidReg, ZeroReg})
1790 .getReg(0);
1791
1792 if (DstTy.getScalarSizeInBits() < 32)
1793 MIB.buildTrunc(DstReg, ExtReg);
1794 else
1795 MIB.buildCopy(DstReg, ExtReg);
1796
1797 MI.eraseFromParent();
1798
1799 return true;
1800 }
1801 case Intrinsic::aarch64_neon_smax:
1802 return LowerBinOp(TargetOpcode::G_SMAX);
1803 case Intrinsic::aarch64_neon_smin:
1804 return LowerBinOp(TargetOpcode::G_SMIN);
1805 case Intrinsic::aarch64_neon_umax:
1806 return LowerBinOp(TargetOpcode::G_UMAX);
1807 case Intrinsic::aarch64_neon_umin:
1808 return LowerBinOp(TargetOpcode::G_UMIN);
1809 case Intrinsic::aarch64_neon_fmax:
1810 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1811 case Intrinsic::aarch64_neon_fmin:
1812 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1813 case Intrinsic::aarch64_neon_fmaxnm:
1814 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1815 case Intrinsic::aarch64_neon_fminnm:
1816 return LowerBinOp(TargetOpcode::G_FMINNUM);
1817 case Intrinsic::aarch64_neon_smull:
1818 return LowerBinOp(AArch64::G_SMULL);
1819 case Intrinsic::aarch64_neon_umull:
1820 return LowerBinOp(AArch64::G_UMULL);
1821 case Intrinsic::aarch64_neon_sabd:
1822 return LowerBinOp(TargetOpcode::G_ABDS);
1823 case Intrinsic::aarch64_neon_uabd:
1824 return LowerBinOp(TargetOpcode::G_ABDU);
1825 case Intrinsic::aarch64_neon_abs: {
1826 // Lower the intrinsic to G_ABS.
1827 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1828 MI.eraseFromParent();
1829 return true;
1830 }
1831 case Intrinsic::aarch64_neon_sqadd: {
1832 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1833 return LowerBinOp(TargetOpcode::G_SADDSAT);
1834 break;
1835 }
1836 case Intrinsic::aarch64_neon_sqsub: {
1837 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1838 return LowerBinOp(TargetOpcode::G_SSUBSAT);
1839 break;
1840 }
1841 case Intrinsic::aarch64_neon_uqadd: {
1842 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1843 return LowerBinOp(TargetOpcode::G_UADDSAT);
1844 break;
1845 }
1846 case Intrinsic::aarch64_neon_uqsub: {
1847 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1848 return LowerBinOp(TargetOpcode::G_USUBSAT);
1849 break;
1850 }
1851 case Intrinsic::aarch64_neon_udot:
1852 return LowerTriOp(AArch64::G_UDOT);
1853 case Intrinsic::aarch64_neon_sdot:
1854 return LowerTriOp(AArch64::G_SDOT);
1855 case Intrinsic::aarch64_neon_sqxtn:
1856 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
1857 case Intrinsic::aarch64_neon_sqxtun:
1858 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
1859 case Intrinsic::aarch64_neon_uqxtn:
1860 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
1861
1862 case Intrinsic::vector_reverse:
1863 // TODO: Add support for vector_reverse
1864 return false;
1865 }
1866
1867 return true;
1868}
1869
1870bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1872 GISelChangeObserver &Observer) const {
1873 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1874 MI.getOpcode() == TargetOpcode::G_LSHR ||
1875 MI.getOpcode() == TargetOpcode::G_SHL);
1876 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1877 // imported patterns can select it later. Either way, it will be legal.
1878 Register AmtReg = MI.getOperand(2).getReg();
1879 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1880 if (!VRegAndVal)
1881 return true;
1882 // Check the shift amount is in range for an immediate form.
1883 int64_t Amount = VRegAndVal->Value.getSExtValue();
1884 if (Amount > 31)
1885 return true; // This will have to remain a register variant.
1886 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1887 Observer.changingInstr(MI);
1888 MI.getOperand(2).setReg(ExtCst.getReg(0));
1889 Observer.changedInstr(MI);
1890 return true;
1891}
1892
1895 Base = Root;
1896 Offset = 0;
1897
1898 Register NewBase;
1899 int64_t NewOffset;
1900 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1901 isShiftedInt<7, 3>(NewOffset)) {
1902 Base = NewBase;
1903 Offset = NewOffset;
1904 }
1905}
1906
1907// FIXME: This should be removed and replaced with the generic bitcast legalize
1908// action.
1909bool AArch64LegalizerInfo::legalizeLoadStore(
1911 GISelChangeObserver &Observer) const {
1912 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1913 MI.getOpcode() == TargetOpcode::G_LOAD);
1914 // Here we just try to handle vector loads/stores where our value type might
1915 // have pointer elements, which the SelectionDAG importer can't handle. To
1916 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1917 // the value to use s64 types.
1918
1919 // Custom legalization requires the instruction, if not deleted, must be fully
1920 // legalized. In order to allow further legalization of the inst, we create
1921 // a new instruction and erase the existing one.
1922
1923 Register ValReg = MI.getOperand(0).getReg();
1924 const LLT ValTy = MRI.getType(ValReg);
1925
1926 if (ValTy == LLT::scalar(128)) {
1927
1928 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1929 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1930 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1931 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1932 bool IsRcpC3 =
1933 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1934
1935 LLT s64 = LLT::scalar(64);
1936
1937 unsigned Opcode;
1938 if (IsRcpC3) {
1939 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1940 } else {
1941 // For LSE2, loads/stores should have been converted to monotonic and had
1942 // a fence inserted after them.
1943 assert(Ordering == AtomicOrdering::Monotonic ||
1944 Ordering == AtomicOrdering::Unordered);
1945 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1946
1947 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1948 }
1949
1951 if (IsLoad) {
1952 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1953 MIRBuilder.buildMergeLikeInstr(
1954 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1955 } else {
1956 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1957 NewI = MIRBuilder.buildInstr(
1958 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1959 }
1960
1961 if (IsRcpC3) {
1962 NewI.addUse(MI.getOperand(1).getReg());
1963 } else {
1964 Register Base;
1965 int Offset;
1966 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1967 NewI.addUse(Base);
1968 NewI.addImm(Offset / 8);
1969 }
1970
1971 NewI.cloneMemRefs(MI);
1973 *MRI.getTargetRegisterInfo(),
1974 *ST->getRegBankInfo());
1975 MI.eraseFromParent();
1976 return true;
1977 }
1978
1979 if (!ValTy.isPointerVector() ||
1980 ValTy.getElementType().getAddressSpace() != 0) {
1981 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1982 return false;
1983 }
1984
1985 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1986 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1987 auto &MMO = **MI.memoperands_begin();
1988 MMO.setType(NewTy);
1989
1990 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1991 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1992 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1993 } else {
1994 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1995 MIRBuilder.buildBitcast(ValReg, NewLoad);
1996 }
1997 MI.eraseFromParent();
1998 return true;
1999}
2000
2001bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2003 MachineIRBuilder &MIRBuilder) const {
2004 MachineFunction &MF = MIRBuilder.getMF();
2005 Align Alignment(MI.getOperand(2).getImm());
2006 Register Dst = MI.getOperand(0).getReg();
2007 Register ListPtr = MI.getOperand(1).getReg();
2008
2009 LLT PtrTy = MRI.getType(ListPtr);
2010 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2011
2012 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2013 const Align PtrAlign = Align(PtrSize);
2014 auto List = MIRBuilder.buildLoad(
2015 PtrTy, ListPtr,
2017 PtrTy, PtrAlign));
2018
2019 MachineInstrBuilder DstPtr;
2020 if (Alignment > PtrAlign) {
2021 // Realign the list to the actual required alignment.
2022 auto AlignMinus1 =
2023 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2024 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2025 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2026 } else
2027 DstPtr = List;
2028
2029 LLT ValTy = MRI.getType(Dst);
2030 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2031 MIRBuilder.buildLoad(
2032 Dst, DstPtr,
2034 ValTy, std::max(Alignment, PtrAlign)));
2035
2036 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2037
2038 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2039
2040 MIRBuilder.buildStore(NewList, ListPtr,
2043 PtrTy, PtrAlign));
2044
2045 MI.eraseFromParent();
2046 return true;
2047}
2048
2049bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2051 // Only legal if we can select immediate forms.
2052 // TODO: Lower this otherwise.
2053 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2054 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2055}
2056
2057bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2059 LegalizerHelper &Helper) const {
2060 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2061 // it can be more efficiently lowered to the following sequence that uses
2062 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2063 // registers are cheap.
2064 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2065 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2066 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2067 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2068 //
2069 // For 128 bit vector popcounts, we lower to the following sequence:
2070 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2071 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2072 // uaddlp.4s v0, v0 // v4s32, v2s64
2073 // uaddlp.2d v0, v0 // v2s64
2074 //
2075 // For 64 bit vector popcounts, we lower to the following sequence:
2076 // cnt.8b v0, v0 // v4s16, v2s32
2077 // uaddlp.4h v0, v0 // v4s16, v2s32
2078 // uaddlp.2s v0, v0 // v2s32
2079
2080 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2081 Register Dst = MI.getOperand(0).getReg();
2082 Register Val = MI.getOperand(1).getReg();
2083 LLT Ty = MRI.getType(Val);
2084 unsigned Size = Ty.getSizeInBits();
2085
2086 assert(Ty == MRI.getType(Dst) &&
2087 "Expected src and dst to have the same type!");
2088
2089 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2090 LLT s64 = LLT::scalar(64);
2091
2092 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2093 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2094 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2095 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2096
2097 MIRBuilder.buildZExt(Dst, Add);
2098 MI.eraseFromParent();
2099 return true;
2100 }
2101
2102 if (!ST->hasNEON() ||
2103 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2104 // Use generic lowering when custom lowering is not possible.
2105 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2106 Helper.lowerBitCount(MI) ==
2108 }
2109
2110 // Pre-conditioning: widen Val up to the nearest vector type.
2111 // s32,s64,v4s16,v2s32 -> v8i8
2112 // v8s16,v4s32,v2s64 -> v16i8
2113 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2114 if (Ty.isScalar()) {
2115 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2116 if (Size == 32) {
2117 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2118 }
2119 }
2120 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2121
2122 // Count bits in each byte-sized lane.
2123 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2124
2125 // Sum across lanes.
2126
2127 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2128 Ty.getScalarSizeInBits() != 16) {
2129 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2130 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2131 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2133
2134 if (Ty == LLT::fixed_vector(2, 64)) {
2135 auto UDOT =
2136 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2137 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2138 } else if (Ty == LLT::fixed_vector(4, 32)) {
2139 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2140 } else if (Ty == LLT::fixed_vector(2, 32)) {
2141 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2142 } else {
2143 llvm_unreachable("unexpected vector shape");
2144 }
2145
2146 Sum->getOperand(0).setReg(Dst);
2147 MI.eraseFromParent();
2148 return true;
2149 }
2150
2151 Register HSum = CTPOP.getReg(0);
2152 unsigned Opc;
2153 SmallVector<LLT> HAddTys;
2154 if (Ty.isScalar()) {
2155 Opc = Intrinsic::aarch64_neon_uaddlv;
2156 HAddTys.push_back(LLT::scalar(32));
2157 } else if (Ty == LLT::fixed_vector(8, 16)) {
2158 Opc = Intrinsic::aarch64_neon_uaddlp;
2159 HAddTys.push_back(LLT::fixed_vector(8, 16));
2160 } else if (Ty == LLT::fixed_vector(4, 32)) {
2161 Opc = Intrinsic::aarch64_neon_uaddlp;
2162 HAddTys.push_back(LLT::fixed_vector(8, 16));
2163 HAddTys.push_back(LLT::fixed_vector(4, 32));
2164 } else if (Ty == LLT::fixed_vector(2, 64)) {
2165 Opc = Intrinsic::aarch64_neon_uaddlp;
2166 HAddTys.push_back(LLT::fixed_vector(8, 16));
2167 HAddTys.push_back(LLT::fixed_vector(4, 32));
2168 HAddTys.push_back(LLT::fixed_vector(2, 64));
2169 } else if (Ty == LLT::fixed_vector(4, 16)) {
2170 Opc = Intrinsic::aarch64_neon_uaddlp;
2171 HAddTys.push_back(LLT::fixed_vector(4, 16));
2172 } else if (Ty == LLT::fixed_vector(2, 32)) {
2173 Opc = Intrinsic::aarch64_neon_uaddlp;
2174 HAddTys.push_back(LLT::fixed_vector(4, 16));
2175 HAddTys.push_back(LLT::fixed_vector(2, 32));
2176 } else
2177 llvm_unreachable("unexpected vector shape");
2179 for (LLT HTy : HAddTys) {
2180 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2181 HSum = UADD.getReg(0);
2182 }
2183
2184 // Post-conditioning.
2185 if (Ty.isScalar() && (Size == 64 || Size == 128))
2186 MIRBuilder.buildZExt(Dst, UADD);
2187 else
2188 UADD->getOperand(0).setReg(Dst);
2189 MI.eraseFromParent();
2190 return true;
2191}
2192
2193bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2195 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2196 LLT s64 = LLT::scalar(64);
2197 auto Addr = MI.getOperand(1).getReg();
2198 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2199 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2200 auto DstLo = MRI.createGenericVirtualRegister(s64);
2201 auto DstHi = MRI.createGenericVirtualRegister(s64);
2202
2204 if (ST->hasLSE()) {
2205 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2206 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2207 // the rest of the MIR so we must reassemble the extracted registers into a
2208 // 128-bit known-regclass one with code like this:
2209 //
2210 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2211 // %out = CASP %in1, ...
2212 // %OldLo = G_EXTRACT %out, 0
2213 // %OldHi = G_EXTRACT %out, 64
2214 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2215 unsigned Opcode;
2216 switch (Ordering) {
2218 Opcode = AArch64::CASPAX;
2219 break;
2221 Opcode = AArch64::CASPLX;
2222 break;
2225 Opcode = AArch64::CASPALX;
2226 break;
2227 default:
2228 Opcode = AArch64::CASPX;
2229 break;
2230 }
2231
2232 LLT s128 = LLT::scalar(128);
2233 auto CASDst = MRI.createGenericVirtualRegister(s128);
2234 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2235 auto CASNew = MRI.createGenericVirtualRegister(s128);
2236 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2237 .addUse(DesiredI->getOperand(0).getReg())
2238 .addImm(AArch64::sube64)
2239 .addUse(DesiredI->getOperand(1).getReg())
2240 .addImm(AArch64::subo64);
2241 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2242 .addUse(NewI->getOperand(0).getReg())
2243 .addImm(AArch64::sube64)
2244 .addUse(NewI->getOperand(1).getReg())
2245 .addImm(AArch64::subo64);
2246
2247 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2248
2249 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2250 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2251 } else {
2252 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2253 // can take arbitrary registers so it just has the normal GPR64 operands the
2254 // rest of AArch64 is expecting.
2255 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2256 unsigned Opcode;
2257 switch (Ordering) {
2259 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2260 break;
2262 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2263 break;
2266 Opcode = AArch64::CMP_SWAP_128;
2267 break;
2268 default:
2269 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2270 break;
2271 }
2272
2273 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2274 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2275 {Addr, DesiredI->getOperand(0),
2276 DesiredI->getOperand(1), NewI->getOperand(0),
2277 NewI->getOperand(1)});
2278 }
2279
2280 CAS.cloneMemRefs(MI);
2282 *MRI.getTargetRegisterInfo(),
2283 *ST->getRegBankInfo());
2284
2285 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2286 MI.eraseFromParent();
2287 return true;
2288}
2289
2290bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2291 LegalizerHelper &Helper) const {
2292 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2293 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2294 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2295 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2296 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2297 MI.eraseFromParent();
2298 return true;
2299}
2300
2301bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2302 LegalizerHelper &Helper) const {
2303 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2304
2305 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2306 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2307 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2308 // the instruction).
2309 auto &Value = MI.getOperand(1);
2310 Register ExtValueReg =
2311 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2312 Value.setReg(ExtValueReg);
2313 return true;
2314 }
2315
2316 return false;
2317}
2318
2319bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2321 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2322 auto VRegAndVal =
2324 if (VRegAndVal)
2325 return true;
2326 LLT VecTy = MRI.getType(Element->getVectorReg());
2327 if (VecTy.isScalableVector())
2328 return true;
2329 return Helper.lowerExtractInsertVectorElt(MI) !=
2331}
2332
2333bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2334 MachineInstr &MI, LegalizerHelper &Helper) const {
2335 MachineFunction &MF = *MI.getParent()->getParent();
2336 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2337 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2338
2339 // If stack probing is not enabled for this function, use the default
2340 // lowering.
2341 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2342 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2343 "inline-asm") {
2344 Helper.lowerDynStackAlloc(MI);
2345 return true;
2346 }
2347
2348 Register Dst = MI.getOperand(0).getReg();
2349 Register AllocSize = MI.getOperand(1).getReg();
2350 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2351
2352 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2353 "Unexpected type for dynamic alloca");
2354 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2355 "Unexpected type for dynamic alloca");
2356
2357 LLT PtrTy = MRI.getType(Dst);
2358 Register SPReg =
2360 Register SPTmp =
2361 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2362 auto NewMI =
2363 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2364 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2365 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2366 MIRBuilder.buildCopy(Dst, SPTmp);
2367
2368 MI.eraseFromParent();
2369 return true;
2370}
2371
2372bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2373 LegalizerHelper &Helper) const {
2374 MachineIRBuilder &MIB = Helper.MIRBuilder;
2375 auto &AddrVal = MI.getOperand(0);
2376
2377 int64_t IsWrite = MI.getOperand(1).getImm();
2378 int64_t Locality = MI.getOperand(2).getImm();
2379 int64_t IsData = MI.getOperand(3).getImm();
2380
2381 bool IsStream = Locality == 0;
2382 if (Locality != 0) {
2383 assert(Locality <= 3 && "Prefetch locality out-of-range");
2384 // The locality degree is the opposite of the cache speed.
2385 // Put the number the other way around.
2386 // The encoding starts at 0 for level 1
2387 Locality = 3 - Locality;
2388 }
2389
2390 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2391
2392 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2393 MI.eraseFromParent();
2394 return true;
2395}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1666
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:400
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
This class represents an Operation in the Expression.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:762
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
Represents an extract vector element.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:265
constexpr bool isScalar() const
Definition: LowLevelType.h:147
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:114
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:65
constexpr bool isPointerVector() const
Definition: LowLevelType.h:153
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:160
constexpr bool isVector() const
Definition: LowLevelType.h:149
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:58
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:191
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:278
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:219
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:271
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:101
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:228
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:235
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
const TargetMachine & getTargetMachine() const
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:75
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:155
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...