LLVM 22.0.0git
RISCVLegalizerInfo.cpp
Go to the documentation of this file.
1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
16#include "RISCVSubtarget.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsRISCV.h"
31#include "llvm/IR/Type.h"
32
33using namespace llvm;
34using namespace LegalityPredicates;
35using namespace LegalizeMutations;
36
38typeIsLegalIntOrFPVec(unsigned TypeIdx,
39 std::initializer_list<LLT> IntOrFPVecTys,
40 const RISCVSubtarget &ST) {
41 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
42 return ST.hasVInstructions() &&
43 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
44 ST.hasVInstructionsI64()) &&
45 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
46 ST.getELen() == 64);
47 };
48
49 return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
50}
51
53typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
54 const RISCVSubtarget &ST) {
55 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
56 return ST.hasVInstructions() &&
57 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
58 ST.getELen() == 64);
59 };
60 return all(typeInSet(TypeIdx, BoolVecTys), P);
61}
62
63static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
64 std::initializer_list<LLT> PtrVecTys,
65 const RISCVSubtarget &ST) {
66 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
67 return ST.hasVInstructions() &&
68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
69 ST.getELen() == 64) &&
70 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
71 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
72 };
73 return all(typeInSet(TypeIdx, PtrVecTys), P);
74}
75
77 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
78 const LLT sDoubleXLen = LLT::scalar(2 * XLen);
79 const LLT p0 = LLT::pointer(0, XLen);
80 const LLT s1 = LLT::scalar(1);
81 const LLT s8 = LLT::scalar(8);
82 const LLT s16 = LLT::scalar(16);
83 const LLT s32 = LLT::scalar(32);
84 const LLT s64 = LLT::scalar(64);
85 const LLT s128 = LLT::scalar(128);
86
87 const LLT nxv1s1 = LLT::scalable_vector(1, s1);
88 const LLT nxv2s1 = LLT::scalable_vector(2, s1);
89 const LLT nxv4s1 = LLT::scalable_vector(4, s1);
90 const LLT nxv8s1 = LLT::scalable_vector(8, s1);
91 const LLT nxv16s1 = LLT::scalable_vector(16, s1);
92 const LLT nxv32s1 = LLT::scalable_vector(32, s1);
93 const LLT nxv64s1 = LLT::scalable_vector(64, s1);
94
95 const LLT nxv1s8 = LLT::scalable_vector(1, s8);
96 const LLT nxv2s8 = LLT::scalable_vector(2, s8);
97 const LLT nxv4s8 = LLT::scalable_vector(4, s8);
98 const LLT nxv8s8 = LLT::scalable_vector(8, s8);
99 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
100 const LLT nxv32s8 = LLT::scalable_vector(32, s8);
101 const LLT nxv64s8 = LLT::scalable_vector(64, s8);
102
103 const LLT nxv1s16 = LLT::scalable_vector(1, s16);
104 const LLT nxv2s16 = LLT::scalable_vector(2, s16);
105 const LLT nxv4s16 = LLT::scalable_vector(4, s16);
106 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
107 const LLT nxv16s16 = LLT::scalable_vector(16, s16);
108 const LLT nxv32s16 = LLT::scalable_vector(32, s16);
109
110 const LLT nxv1s32 = LLT::scalable_vector(1, s32);
111 const LLT nxv2s32 = LLT::scalable_vector(2, s32);
112 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
113 const LLT nxv8s32 = LLT::scalable_vector(8, s32);
114 const LLT nxv16s32 = LLT::scalable_vector(16, s32);
115
116 const LLT nxv1s64 = LLT::scalable_vector(1, s64);
117 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
118 const LLT nxv4s64 = LLT::scalable_vector(4, s64);
119 const LLT nxv8s64 = LLT::scalable_vector(8, s64);
120
121 const LLT nxv1p0 = LLT::scalable_vector(1, p0);
122 const LLT nxv2p0 = LLT::scalable_vector(2, p0);
123 const LLT nxv4p0 = LLT::scalable_vector(4, p0);
124 const LLT nxv8p0 = LLT::scalable_vector(8, p0);
125 const LLT nxv16p0 = LLT::scalable_vector(16, p0);
126
127 using namespace TargetOpcode;
128
129 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
130
131 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
132 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
133 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
134 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
135
136 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
137
138 getActionDefinitionsBuilder({G_ADD, G_SUB})
139 .legalFor({sXLen})
140 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
141 .customFor(ST.is64Bit(), {s32})
143 .clampScalar(0, sXLen, sXLen);
144
145 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
146 .legalFor({sXLen})
147 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
149 .clampScalar(0, sXLen, sXLen);
150
152 {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
153
154 getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
155
156 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
158 {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT})
159 .lower();
160
161 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
162 .legalFor({{sXLen, sXLen}})
163 .customFor(ST.is64Bit(), {{s32, s32}})
164 .widenScalarToNextPow2(0)
165 .clampScalar(1, sXLen, sXLen)
166 .clampScalar(0, sXLen, sXLen);
167
168 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
169 .legalFor({{s32, s16}})
170 .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}})
171 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
172 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
173 .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
174 .maxScalar(0, sXLen);
175
176 getActionDefinitionsBuilder(G_SEXT_INREG)
177 .customFor({sXLen})
178 .clampScalar(0, sXLen, sXLen)
179 .lower();
180
181 // Merge/Unmerge
182 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
183 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
184 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
185 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
186 if (XLen == 32 && ST.hasStdExtD()) {
187 MergeUnmergeActions.legalIf(
188 all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
189 }
190 MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
191 .widenScalarToNextPow2(BigTyIdx, XLen)
192 .clampScalar(LitTyIdx, sXLen, sXLen)
193 .clampScalar(BigTyIdx, sXLen, sXLen);
194 }
195
196 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
197
198 getActionDefinitionsBuilder({G_ROTR, G_ROTL})
199 .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}})
200 .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
201 {{s32, s32}})
202 .lower();
203
204 getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
205
206 getActionDefinitionsBuilder(G_BITCAST).legalIf(
208 typeIsLegalBoolVec(0, BoolVecTys, ST)),
210 typeIsLegalBoolVec(1, BoolVecTys, ST))));
211
212 auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
213 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
214 BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
215 else
216 BSWAPActions.maxScalar(0, sXLen).lower();
217
218 auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
219 auto &CountZerosUndefActions =
220 getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
221 if (ST.hasStdExtZbb()) {
222 CountZerosActions.legalFor({{sXLen, sXLen}})
223 .customFor({{s32, s32}})
224 .clampScalar(0, s32, sXLen)
225 .widenScalarToNextPow2(0)
226 .scalarSameSizeAs(1, 0);
227 } else {
228 CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
229 CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
230 }
231 CountZerosUndefActions.lower();
232
233 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
234 if (ST.hasStdExtZbb()) {
235 CTPOPActions.legalFor({{sXLen, sXLen}})
236 .clampScalar(0, sXLen, sXLen)
237 .scalarSameSizeAs(1, 0);
238 } else {
239 CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
240 }
241
242 getActionDefinitionsBuilder(G_CONSTANT)
243 .legalFor({p0})
244 .legalFor(!ST.is64Bit(), {s32})
245 .customFor(ST.is64Bit(), {s64})
246 .widenScalarToNextPow2(0)
247 .clampScalar(0, sXLen, sXLen);
248
249 // TODO: transform illegal vector types into legal vector type
250 getActionDefinitionsBuilder(G_FREEZE)
251 .legalFor({s16, s32, p0})
252 .legalFor(ST.is64Bit(), {s64})
253 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
254 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
255 .widenScalarToNextPow2(0)
256 .clampScalar(0, s16, sXLen);
257
258 // TODO: transform illegal vector types into legal vector type
259 // TODO: Merge with G_FREEZE?
260 getActionDefinitionsBuilder(
261 {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
262 .legalFor({s32, sXLen, p0})
263 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
264 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
265 .widenScalarToNextPow2(0)
266 .clampScalar(0, s32, sXLen);
267
268 getActionDefinitionsBuilder(G_ICMP)
269 .legalFor({{sXLen, sXLen}, {sXLen, p0}})
270 .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
271 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
272 .widenScalarOrEltToNextPow2OrMinSize(1, 8)
273 .clampScalar(1, sXLen, sXLen)
274 .clampScalar(0, sXLen, sXLen);
275
276 getActionDefinitionsBuilder(G_SELECT)
277 .legalFor({{s32, sXLen}, {p0, sXLen}})
278 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
279 typeIsLegalBoolVec(1, BoolVecTys, ST)))
280 .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}})
281 .widenScalarToNextPow2(0)
282 .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
283 .clampScalar(1, sXLen, sXLen);
284
285 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
286 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
287 auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});
288
289 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
290 // is supported, we only require byte alignment. Otherwise, we need the memory
291 // op to be natively aligned.
292 auto getScalarMemAlign = [&ST](unsigned Size) {
293 return ST.enableUnalignedScalarMem() ? 8 : Size;
294 };
295
296 LoadActions.legalForTypesWithMemDesc(
297 {{s16, p0, s8, getScalarMemAlign(8)},
298 {s32, p0, s8, getScalarMemAlign(8)},
299 {s16, p0, s16, getScalarMemAlign(16)},
300 {s32, p0, s16, getScalarMemAlign(16)},
301 {s32, p0, s32, getScalarMemAlign(32)},
302 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
303 StoreActions.legalForTypesWithMemDesc(
304 {{s16, p0, s8, getScalarMemAlign(8)},
305 {s32, p0, s8, getScalarMemAlign(8)},
306 {s16, p0, s16, getScalarMemAlign(16)},
307 {s32, p0, s16, getScalarMemAlign(16)},
308 {s32, p0, s32, getScalarMemAlign(32)},
309 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
310 ExtLoadActions.legalForTypesWithMemDesc(
311 {{sXLen, p0, s8, getScalarMemAlign(8)},
312 {sXLen, p0, s16, getScalarMemAlign(16)}});
313 if (XLen == 64) {
314 LoadActions.legalForTypesWithMemDesc(
315 {{s64, p0, s8, getScalarMemAlign(8)},
316 {s64, p0, s16, getScalarMemAlign(16)},
317 {s64, p0, s32, getScalarMemAlign(32)},
318 {s64, p0, s64, getScalarMemAlign(64)}});
319 StoreActions.legalForTypesWithMemDesc(
320 {{s64, p0, s8, getScalarMemAlign(8)},
321 {s64, p0, s16, getScalarMemAlign(16)},
322 {s64, p0, s32, getScalarMemAlign(32)},
323 {s64, p0, s64, getScalarMemAlign(64)}});
324 ExtLoadActions.legalForTypesWithMemDesc(
325 {{s64, p0, s32, getScalarMemAlign(32)}});
326 } else if (ST.hasStdExtD()) {
327 LoadActions.legalForTypesWithMemDesc(
328 {{s64, p0, s64, getScalarMemAlign(64)}});
329 StoreActions.legalForTypesWithMemDesc(
330 {{s64, p0, s64, getScalarMemAlign(64)}});
331 }
332
333 // Vector loads/stores.
334 if (ST.hasVInstructions()) {
335 LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
336 {nxv4s8, p0, nxv4s8, 8},
337 {nxv8s8, p0, nxv8s8, 8},
338 {nxv16s8, p0, nxv16s8, 8},
339 {nxv32s8, p0, nxv32s8, 8},
340 {nxv64s8, p0, nxv64s8, 8},
341 {nxv2s16, p0, nxv2s16, 16},
342 {nxv4s16, p0, nxv4s16, 16},
343 {nxv8s16, p0, nxv8s16, 16},
344 {nxv16s16, p0, nxv16s16, 16},
345 {nxv32s16, p0, nxv32s16, 16},
346 {nxv2s32, p0, nxv2s32, 32},
347 {nxv4s32, p0, nxv4s32, 32},
348 {nxv8s32, p0, nxv8s32, 32},
349 {nxv16s32, p0, nxv16s32, 32}});
350 StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
351 {nxv4s8, p0, nxv4s8, 8},
352 {nxv8s8, p0, nxv8s8, 8},
353 {nxv16s8, p0, nxv16s8, 8},
354 {nxv32s8, p0, nxv32s8, 8},
355 {nxv64s8, p0, nxv64s8, 8},
356 {nxv2s16, p0, nxv2s16, 16},
357 {nxv4s16, p0, nxv4s16, 16},
358 {nxv8s16, p0, nxv8s16, 16},
359 {nxv16s16, p0, nxv16s16, 16},
360 {nxv32s16, p0, nxv32s16, 16},
361 {nxv2s32, p0, nxv2s32, 32},
362 {nxv4s32, p0, nxv4s32, 32},
363 {nxv8s32, p0, nxv8s32, 32},
364 {nxv16s32, p0, nxv16s32, 32}});
365
366 if (ST.getELen() == 64) {
367 LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
368 {nxv1s16, p0, nxv1s16, 16},
369 {nxv1s32, p0, nxv1s32, 32}});
370 StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
371 {nxv1s16, p0, nxv1s16, 16},
372 {nxv1s32, p0, nxv1s32, 32}});
373 }
374
375 if (ST.hasVInstructionsI64()) {
376 LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
377 {nxv2s64, p0, nxv2s64, 64},
378 {nxv4s64, p0, nxv4s64, 64},
379 {nxv8s64, p0, nxv8s64, 64}});
380 StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
381 {nxv2s64, p0, nxv2s64, 64},
382 {nxv4s64, p0, nxv4s64, 64},
383 {nxv8s64, p0, nxv8s64, 64}});
384 }
385
386 // we will take the custom lowering logic if we have scalable vector types
387 // with non-standard alignments
388 LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
389 StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
390
391 // Pointers require that XLen sized elements are legal.
392 if (XLen <= ST.getELen()) {
393 LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
394 StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
395 }
396 }
397
398 LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8)
399 .lowerIfMemSizeNotByteSizePow2()
400 .clampScalar(0, s16, sXLen)
401 .lower();
402 StoreActions
403 .clampScalar(0, s16, sXLen)
404 .lowerIfMemSizeNotByteSizePow2()
405 .lower();
406
407 ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower();
408
409 getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
410
411 getActionDefinitionsBuilder(G_PTRTOINT)
412 .legalFor({{sXLen, p0}})
413 .clampScalar(0, sXLen, sXLen);
414
415 getActionDefinitionsBuilder(G_INTTOPTR)
416 .legalFor({{p0, sXLen}})
417 .clampScalar(1, sXLen, sXLen);
418
419 getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
420
421 getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}});
422
423 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
424
425 getActionDefinitionsBuilder(G_PHI)
426 .legalFor({p0, s32, sXLen})
427 .widenScalarToNextPow2(0)
428 .clampScalar(0, s32, sXLen);
429
430 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
431 .legalFor({p0});
432
433 if (ST.hasStdExtZmmul()) {
434 getActionDefinitionsBuilder(G_MUL)
435 .legalFor({sXLen})
436 .widenScalarToNextPow2(0)
437 .clampScalar(0, sXLen, sXLen);
438
439 // clang-format off
440 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
441 .legalFor({sXLen})
442 .lower();
443 // clang-format on
444
445 getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
446 } else {
447 getActionDefinitionsBuilder(G_MUL)
448 .libcallFor({sXLen, sDoubleXLen})
449 .widenScalarToNextPow2(0)
450 .clampScalar(0, sXLen, sDoubleXLen);
451
452 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
453
454 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
455 .minScalar(0, sXLen)
456 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
457 // the low bits for the mul result and high bits to do the overflow
458 // check.
459 .widenScalarIf(typeIs(0, sXLen),
460 LegalizeMutations::changeTo(0, sDoubleXLen))
461 .lower();
462 }
463
464 if (ST.hasStdExtM()) {
465 getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM})
466 .legalFor({sXLen})
467 .customFor({s32})
468 .libcallFor({sDoubleXLen})
469 .clampScalar(0, s32, sDoubleXLen)
470 .widenScalarToNextPow2(0);
471 getActionDefinitionsBuilder(G_SREM)
472 .legalFor({sXLen})
473 .libcallFor({sDoubleXLen})
474 .clampScalar(0, sXLen, sDoubleXLen)
475 .widenScalarToNextPow2(0);
476 } else {
477 getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
478 .libcallFor({sXLen, sDoubleXLen})
479 .clampScalar(0, sXLen, sDoubleXLen)
480 .widenScalarToNextPow2(0);
481 }
482
483 // TODO: Use libcall for sDoubleXLen.
484 getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower();
485
486 getActionDefinitionsBuilder(G_ABS)
487 .customFor(ST.hasStdExtZbb(), {sXLen})
488 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
489 .lower();
490
491 getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
492 .legalFor(ST.hasStdExtZbb(), {sXLen})
493 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
494 .lower();
495
496 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
497
498 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
499
500 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
501
502 getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
503 .lower();
504
505 // FP Operations
506
507 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
508 getActionDefinitionsBuilder(
509 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
510 .legalFor(ST.hasStdExtF(), {s32})
511 .legalFor(ST.hasStdExtD(), {s64})
512 .legalFor(ST.hasStdExtZfh(), {s16})
513 .libcallFor({s32, s64})
514 .libcallFor(ST.is64Bit(), {s128});
515
516 getActionDefinitionsBuilder({G_FNEG, G_FABS})
517 .legalFor(ST.hasStdExtF(), {s32})
518 .legalFor(ST.hasStdExtD(), {s64})
519 .legalFor(ST.hasStdExtZfh(), {s16})
520 .lowerFor({s32, s64, s128});
521
522 getActionDefinitionsBuilder(G_FREM)
523 .libcallFor({s32, s64})
524 .libcallFor(ST.is64Bit(), {s128})
525 .minScalar(0, s32)
526 .scalarize(0);
527
528 getActionDefinitionsBuilder(G_FCOPYSIGN)
529 .legalFor(ST.hasStdExtF(), {{s32, s32}})
530 .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}})
531 .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}})
532 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}})
533 .lower();
534
535 // FIXME: Use Zfhmin.
536 getActionDefinitionsBuilder(G_FPTRUNC)
537 .legalFor(ST.hasStdExtD(), {{s32, s64}})
538 .legalFor(ST.hasStdExtZfh(), {{s16, s32}})
539 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
540 .libcallFor({{s32, s64}})
541 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
542 getActionDefinitionsBuilder(G_FPEXT)
543 .legalFor(ST.hasStdExtD(), {{s64, s32}})
544 .legalFor(ST.hasStdExtZfh(), {{s32, s16}})
545 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
546 .libcallFor({{s64, s32}})
547 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
548
549 getActionDefinitionsBuilder(G_FCMP)
550 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
551 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
552 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
553 .clampScalar(0, sXLen, sXLen)
554 .libcallFor({{sXLen, s32}, {sXLen, s64}})
555 .libcallFor(ST.is64Bit(), {{sXLen, s128}});
556
557 // TODO: Support vector version of G_IS_FPCLASS.
558 getActionDefinitionsBuilder(G_IS_FPCLASS)
559 .customFor(ST.hasStdExtF(), {{s1, s32}})
560 .customFor(ST.hasStdExtD(), {{s1, s64}})
561 .customFor(ST.hasStdExtZfh(), {{s1, s16}})
562 .lowerFor({{s1, s32}, {s1, s64}});
563
564 getActionDefinitionsBuilder(G_FCONSTANT)
565 .legalFor(ST.hasStdExtF(), {s32})
566 .legalFor(ST.hasStdExtD(), {s64})
567 .legalFor(ST.hasStdExtZfh(), {s16})
568 .lowerFor({s32, s64, s128});
569
570 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
571 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
572 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
573 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
574 .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}})
575 .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}})
576 .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}})
577 .widenScalarToNextPow2(0)
578 .minScalar(0, s32)
579 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
580 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
581 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});
582
583 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
584 .legalFor(ST.hasStdExtF(), {{s32, sXLen}})
585 .legalFor(ST.hasStdExtD(), {{s64, sXLen}})
586 .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}})
587 .widenScalarToNextPow2(1)
588 // Promote to XLen if the operation is legal.
589 .widenScalarIf(
590 [=, &ST](const LegalityQuery &Query) {
591 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
592 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
593 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
594 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
595 (ST.hasStdExtZfh() &&
596 Query.Types[0].getSizeInBits() == 16));
597 },
599 // Otherwise only promote to s32 since we have si libcalls.
600 .minScalar(1, s32)
601 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
602 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
603 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});
604
605 // FIXME: We can do custom inline expansion like SelectionDAG.
606 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
607 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
608 G_INTRINSIC_ROUNDEVEN})
609 .legalFor(ST.hasStdExtZfa(), {s32})
610 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
611 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16})
612 .libcallFor({s32, s64})
613 .libcallFor(ST.is64Bit(), {s128});
614
615 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
616 .legalFor(ST.hasStdExtZfa(), {s32})
617 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
618 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16});
619
620 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
621 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
622 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
623 G_FTANH})
624 .libcallFor({s32, s64})
625 .libcallFor(ST.is64Bit(), {s128});
626 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
627 .libcallFor({{s32, s32}, {s64, s32}})
628 .libcallFor(ST.is64Bit(), {s128, s32});
629
630 getActionDefinitionsBuilder(G_VASTART).customFor({p0});
631
632 // va_list must be a pointer, but most sized types are pretty easy to handle
633 // as the destination.
634 getActionDefinitionsBuilder(G_VAARG)
635 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
636 // other than sXLen.
637 .clampScalar(0, sXLen, sXLen)
638 .lowerForCartesianProduct({sXLen, p0}, {p0});
639
640 getActionDefinitionsBuilder(G_VSCALE)
641 .clampScalar(0, sXLen, sXLen)
642 .customFor({sXLen});
643
644 auto &SplatActions =
645 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
646 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
647 typeIs(1, sXLen)))
648 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
649 // Handle case of s64 element vectors on RV32. If the subtarget does not have
650 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
651 // does have f64, then we don't know whether the type is an f64 or an i64,
652 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
653 // depending on how the instructions it consumes are legalized. They are not
654 // legalized yet since legalization is in reverse postorder, so we cannot
655 // make the decision at this moment.
656 if (XLen == 32) {
657 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
658 SplatActions.legalIf(all(
659 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
660 else if (ST.hasVInstructionsI64())
661 SplatActions.customIf(all(
662 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
663 }
664
665 SplatActions.clampScalar(1, sXLen, sXLen);
666
667 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
668 LLT DstTy = Query.Types[0];
669 LLT SrcTy = Query.Types[1];
670 return DstTy.getElementType() == LLT::scalar(1) &&
671 DstTy.getElementCount().getKnownMinValue() >= 8 &&
672 SrcTy.getElementCount().getKnownMinValue() >= 8;
673 };
674 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
675 // We don't have the ability to slide mask vectors down indexed by their
676 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
677 // to equivalent i8 vectors.
678 .bitcastIf(
679 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
680 typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
681 [=](const LegalityQuery &Query) {
682 LLT CastTy = LLT::vector(
683 Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
684 return std::pair(0, CastTy);
685 })
686 .customIf(LegalityPredicates::any(
687 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
688 typeIsLegalBoolVec(1, BoolVecTys, ST)),
689 all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
690 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));
691
692 getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
693 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
694 typeIsLegalBoolVec(1, BoolVecTys, ST)))
695 .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
696 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
697
698 getActionDefinitionsBuilder(G_ATOMICRMW_ADD)
699 .legalFor(ST.hasStdExtA(), {{sXLen, p0}})
700 .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
701 .clampScalar(0, sXLen, sXLen);
702
703 getLegacyLegalizerInfo().computeTables();
704 verify(*ST.getInstrInfo());
705}
706
708 MachineInstr &MI) const {
709 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
710 switch (IntrinsicID) {
711 default:
712 return false;
713 case Intrinsic::vacopy: {
714 // vacopy arguments must be legal because of the intrinsic signature.
715 // No need to check here.
716
717 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
718 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
719 MachineFunction &MF = *MI.getMF();
720 const DataLayout &DL = MIRBuilder.getDataLayout();
721 LLVMContext &Ctx = MF.getFunction().getContext();
722
723 Register DstLst = MI.getOperand(1).getReg();
724 LLT PtrTy = MRI.getType(DstLst);
725
726 // Load the source va_list
727 Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
729 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
730 auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
731
732 // Store the result in the destination va_list
735 MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);
736
737 MI.eraseFromParent();
738 return true;
739 }
740 case Intrinsic::riscv_masked_atomicrmw_add:
741 return true;
742 }
743}
744
745bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
746 MachineIRBuilder &MIRBuilder) const {
747 // Stores the address of the VarArgsFrameIndex slot into the memory location
748 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
749 MachineFunction *MF = MI.getParent()->getParent();
751 int FI = FuncInfo->getVarArgsFrameIndex();
752 LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
753 auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
754 assert(MI.hasOneMemOperand());
755 MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
756 *MI.memoperands()[0]);
757 MI.eraseFromParent();
758 return true;
759}
760
761bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
762 MachineIRBuilder &MIRBuilder) const {
763 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
764 auto &MF = *MI.getParent()->getParent();
765 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
766 unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
767
768 Register PtrReg = MI.getOperand(0).getReg();
769 LLT PtrTy = MRI.getType(PtrReg);
770 Register IndexReg = MI.getOperand(2).getReg();
771 LLT IndexTy = MRI.getType(IndexReg);
772
773 if (!isPowerOf2_32(EntrySize))
774 return false;
775
776 auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize));
777 IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0);
778
779 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg);
780
783 EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())));
784
785 Register TargetReg;
786 switch (MJTI->getEntryKind()) {
787 default:
788 return false;
790 // For PIC, the sequence is:
791 // BRIND(load(Jumptable + index) + RelocBase)
792 // RelocBase can be JumpTable, GOT or some sort of global base.
793 unsigned LoadOpc =
794 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
795 auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO);
796 TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0);
797 break;
798 }
800 auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy,
801 Addr, *MMO);
802 TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0);
803 break;
804 }
806 TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0);
807 break;
808 }
809
810 MIRBuilder.buildBrIndirect(TargetReg);
811
812 MI.eraseFromParent();
813 return true;
814}
815
816bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
817 bool ShouldOptForSize) const {
818 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
819 int64_t Imm = APImm.getSExtValue();
820 // All simm32 constants should be handled by isel.
821 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
822 // this check redundant, but small immediates are common so this check
823 // should have better compile time.
824 if (isInt<32>(Imm))
825 return false;
826
827 // We only need to cost the immediate, if constant pool lowering is enabled.
829 return false;
830
832 if (Seq.size() <= STI.getMaxBuildIntsCost())
833 return false;
834
835 // Optimizations below are disabled for opt size. If we're optimizing for
836 // size, use a constant pool.
837 if (ShouldOptForSize)
838 return true;
839 //
840 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
841 // that if it will avoid a constant pool.
842 // It will require an extra temporary register though.
843 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
844 // low and high 32 bits are the same and bit 31 and 63 are set.
845 unsigned ShiftAmt, AddOpc;
847 RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
848 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
849}
850
851bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
852 MachineIRBuilder &MIB) const {
853 const LLT XLenTy(STI.getXLenVT());
854 Register Dst = MI.getOperand(0).getReg();
855
856 // We define our scalable vector types for lmul=1 to use a 64 bit known
857 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
858 // vscale as VLENB / 8.
859 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
861 // Support for VLEN==32 is incomplete.
862 return false;
863
864 // We assume VLENB is a multiple of 8. We manually choose the best shift
865 // here because SimplifyDemandedBits isn't always able to simplify it.
866 uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
867 if (isPowerOf2_64(Val)) {
868 uint64_t Log2 = Log2_64(Val);
869 if (Log2 < 3) {
870 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
871 MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
872 } else if (Log2 > 3) {
873 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
874 MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
875 } else {
876 MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
877 }
878 } else if ((Val % 8) == 0) {
879 // If the multiplier is a multiple of 8, scale it down to avoid needing
880 // to shift the VLENB value.
881 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
882 MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
883 } else {
884 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
885 auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
886 MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
887 }
888 MI.eraseFromParent();
889 return true;
890}
891
892// Custom-lower extensions from mask vectors by using a vselect either with 1
893// for zero/any-extension or -1 for sign-extension:
894// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
895// Note that any-extension is lowered identically to zero-extension.
896bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
897 MachineIRBuilder &MIB) const {
898
899 unsigned Opc = MI.getOpcode();
900 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
901 Opc == TargetOpcode::G_ANYEXT);
902
904 Register Dst = MI.getOperand(0).getReg();
905 Register Src = MI.getOperand(1).getReg();
906
907 LLT DstTy = MRI.getType(Dst);
908 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
909 LLT DstEltTy = DstTy.getElementType();
910 auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
911 auto SplatTrue =
912 MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
913 MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);
914
915 MI.eraseFromParent();
916 return true;
917}
918
919bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
920 LegalizerHelper &Helper,
921 MachineIRBuilder &MIB) const {
922 assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
923 "Machine instructions must be Load/Store.");
925 MachineFunction *MF = MI.getMF();
926 const DataLayout &DL = MIB.getDataLayout();
927 LLVMContext &Ctx = MF->getFunction().getContext();
928
929 Register DstReg = MI.getOperand(0).getReg();
930 LLT DataTy = MRI.getType(DstReg);
931 if (!DataTy.isVector())
932 return false;
933
934 if (!MI.hasOneMemOperand())
935 return false;
936
937 MachineMemOperand *MMO = *MI.memoperands_begin();
938
939 const auto *TLI = STI.getTargetLowering();
940 EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx));
941
942 if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO))
943 return true;
944
945 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
946 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
947 "Unexpected unaligned RVV load type");
948
949 // Calculate the new vector type with i8 elements
950 unsigned NumElements =
951 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
952 LLT NewDataTy = LLT::scalable_vector(NumElements, 8);
953
954 Helper.bitcast(MI, 0, NewDataTy);
955
956 return true;
957}
958
959/// Return the type of the mask type suitable for masking the provided
960/// vector type. This is simply an i1 element type vector of the same
961/// (possibly scalable) length.
962static LLT getMaskTypeFor(LLT VecTy) {
963 assert(VecTy.isVector());
964 ElementCount EC = VecTy.getElementCount();
965 return LLT::vector(EC, LLT::scalar(1));
966}
967
968/// Creates an all ones mask suitable for masking a vector of type VecTy with
969/// vector length VL.
971 MachineIRBuilder &MIB,
973 LLT MaskTy = getMaskTypeFor(VecTy);
974 return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
975}
976
977/// Gets the two common "VL" operands: an all-ones mask and the vector length.
978/// VecTy is a scalable vector type.
979static std::pair<MachineInstrBuilder, MachineInstrBuilder>
981 assert(VecTy.isScalableVector() && "Expecting scalable container type");
982 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
983 LLT XLenTy(STI.getXLenVT());
984 auto VL = MIB.buildConstant(XLenTy, -1);
985 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
986 return {Mask, VL};
987}
988
990buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
991 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
993 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
994 // splat Lo even if it might be sign extended. I don't think we have
995 // introduced a case where we're build a s64 where the upper bits are undef
996 // yet.
997
998 // Fall back to a stack store and stride x0 vector load.
999 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
1000 // preprocessDAG in SDAG.
1001 return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
1002 {Passthru, Lo, Hi, VL});
1003}
1004
1006buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
1007 const SrcOp &Scalar, const SrcOp &VL,
1009 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1010 auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
1011 return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
1012 Unmerge.getReg(1), VL, MIB, MRI);
1013}
1014
1015// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1016// legal equivalently-sized i8 type, so we can use that as a go-between.
1017// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1018// VMCLR_VL.
1019bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1020 MachineIRBuilder &MIB) const {
1021 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1022
1023 MachineRegisterInfo &MRI = *MIB.getMRI();
1024
1025 Register Dst = MI.getOperand(0).getReg();
1026 Register SplatVal = MI.getOperand(1).getReg();
1027
1028 LLT VecTy = MRI.getType(Dst);
1029 LLT XLenTy(STI.getXLenVT());
1030
1031 // Handle case of s64 element vectors on rv32
1032 if (XLenTy.getSizeInBits() == 32 &&
1033 VecTy.getElementType().getSizeInBits() == 64) {
1034 auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
1035 buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
1036 MRI);
1037 MI.eraseFromParent();
1038 return true;
1039 }
1040
1041 // All-zeros or all-ones splats are handled specially.
1042 MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
1043 if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
1044 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1045 MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
1046 MI.eraseFromParent();
1047 return true;
1048 }
1049 if (isNullOrNullSplat(SplatValMI, MRI)) {
1050 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1051 MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
1052 MI.eraseFromParent();
1053 return true;
1054 }
1055
1056 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1057 // ones) by promoting it to an s8 splat.
1058 LLT InterEltTy = LLT::scalar(8);
1059 LLT InterTy = VecTy.changeElementType(InterEltTy);
1060 auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
1061 auto And =
1062 MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
1063 auto LHS = MIB.buildSplatVector(InterTy, And);
1064 auto ZeroSplat =
1065 MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
1066 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
1067 MI.eraseFromParent();
1068 return true;
1069}
1070
1071static LLT getLMUL1Ty(LLT VecTy) {
1072 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1073 "Unexpected vector LLT");
1075 VecTy.getElementType().getSizeInBits(),
1076 VecTy.getElementType());
1077}
1078
1079bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1080 MachineIRBuilder &MIB) const {
1081 GExtractSubvector &ES = cast<GExtractSubvector>(MI);
1082
1083 MachineRegisterInfo &MRI = *MIB.getMRI();
1084
1085 Register Dst = ES.getReg(0);
1086 Register Src = ES.getSrcVec();
1087 uint64_t Idx = ES.getIndexImm();
1088
1089 // With an index of 0 this is a cast-like subvector, which can be performed
1090 // with subregister operations.
1091 if (Idx == 0)
1092 return true;
1093
1094 LLT LitTy = MRI.getType(Dst);
1095 LLT BigTy = MRI.getType(Src);
1096
1097 if (LitTy.getElementType() == LLT::scalar(1)) {
1098 // We can't slide this mask vector up indexed by its i1 elements.
1099 // This poses a problem when we wish to insert a scalable vector which
1100 // can't be re-expressed as a larger type. Just choose the slow path and
1101 // extend to a larger type, then truncate back down.
1102 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1103 LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
1104 auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
1105 auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
1106 auto SplatZero = MIB.buildSplatVector(
1107 ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
1108 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
1109 MI.eraseFromParent();
1110 return true;
1111 }
1112
1113 // extract_subvector scales the index by vscale if the subvector is scalable,
1114 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1115 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1116 MVT LitTyMVT = getMVTForLLT(LitTy);
1117 auto Decompose =
1119 getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
1120 unsigned RemIdx = Decompose.second;
1121
1122 // If the Idx has been completely eliminated then this is a subvector extract
1123 // which naturally aligns to a vector register. These can easily be handled
1124 // using subregister manipulation.
1125 if (RemIdx == 0)
1126 return true;
1127
1128 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1129 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1130 // divide exactly.
1131 assert(
1134
1135 // If the vector type is an LMUL-group type, extract a subvector equal to the
1136 // nearest full vector register type.
1137 LLT InterLitTy = BigTy;
1138 Register Vec = Src;
1140 getLMUL1Ty(BigTy).getSizeInBits())) {
1141 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1142 // we should have successfully decomposed the extract into a subregister.
1143 assert(Decompose.first != RISCV::NoSubRegister);
1144 InterLitTy = getLMUL1Ty(BigTy);
1145 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1146 // specified on the source Register (the equivalent) since generic virtual
1147 // register does not allow subregister index.
1148 Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
1149 }
1150
1151 // Slide this vector register down by the desired number of elements in order
1152 // to place the desired subvector starting at element 0.
1153 const LLT XLenTy(STI.getXLenVT());
1154 auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
1155 auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
1157 auto Slidedown = MIB.buildInstr(
1158 RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1159 {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1160
1161 // Now the vector is in the right position, extract our final subvector. This
1162 // should resolve to a COPY.
1163 MIB.buildExtractSubvector(Dst, Slidedown, 0);
1164
1165 MI.eraseFromParent();
1166 return true;
1167}
1168
1169bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1170 LegalizerHelper &Helper,
1171 MachineIRBuilder &MIB) const {
1172 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1173
1174 MachineRegisterInfo &MRI = *MIB.getMRI();
1175
1176 Register Dst = IS.getReg(0);
1177 Register BigVec = IS.getBigVec();
1178 Register LitVec = IS.getSubVec();
1179 uint64_t Idx = IS.getIndexImm();
1180
1181 LLT BigTy = MRI.getType(BigVec);
1182 LLT LitTy = MRI.getType(LitVec);
1183
1184 if (Idx == 0 ||
1185 MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1186 return true;
1187
1188 // We don't have the ability to slide mask vectors up indexed by their i1
1189 // elements; the smallest we can do is i8. Often we are able to bitcast to
1190 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1191 // vectors and truncate down after the insert.
1192 if (LitTy.getElementType() == LLT::scalar(1)) {
1193 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1194 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1195 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1196 return Helper.bitcast(
1197 IS, 0,
1199
1200 // We can't slide this mask vector up indexed by its i1 elements.
1201 // This poses a problem when we wish to insert a scalable vector which
1202 // can't be re-expressed as a larger type. Just choose the slow path and
1203 // extend to a larger type, then truncate back down.
1204 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1205 return Helper.widenScalar(IS, 0, ExtBigTy);
1206 }
1207
1208 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1209 unsigned SubRegIdx, RemIdx;
1210 std::tie(SubRegIdx, RemIdx) =
1212 getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
1213
1216 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1217 bool ExactlyVecRegSized =
1218 STI.expandVScale(LitTy.getSizeInBits())
1219 .isKnownMultipleOf(STI.expandVScale(VecRegSize));
1220
1221 // If the Idx has been completely eliminated and this subvector's size is a
1222 // vector register or a multiple thereof, or the surrounding elements are
1223 // undef, then this is a subvector insert which naturally aligns to a vector
1224 // register. These can easily be handled using subregister manipulation.
1225 if (RemIdx == 0 && ExactlyVecRegSized)
1226 return true;
1227
1228 // If the subvector is smaller than a vector register, then the insertion
1229 // must preserve the undisturbed elements of the register. We do this by
1230 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1231 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1232 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1233 // LMUL=1 type back into the larger vector (resolving to another subregister
1234 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1235 // to avoid allocating a large register group to hold our subvector.
1236
1237 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1238 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1239 // (in our case undisturbed). This means we can set up a subvector insertion
1240 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1241 // size of the subvector.
1242 const LLT XLenTy(STI.getXLenVT());
1243 LLT InterLitTy = BigTy;
1244 Register AlignedExtract = BigVec;
1245 unsigned AlignedIdx = Idx - RemIdx;
1247 getLMUL1Ty(BigTy).getSizeInBits())) {
1248 InterLitTy = getLMUL1Ty(BigTy);
1249 // Extract a subvector equal to the nearest full vector register type. This
1250 // should resolve to a G_EXTRACT on a subreg.
1251 AlignedExtract =
1252 MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
1253 }
1254
1255 auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
1256 LitVec, 0);
1257
1258 auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
1259 auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
1260
1261 // If we're inserting into the lowest elements, use a tail undisturbed
1262 // vmv.v.v.
1264 bool NeedInsertSubvec =
1265 TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
1266 Register InsertedDst =
1267 NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
1268 if (RemIdx == 0) {
1269 Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
1270 {AlignedExtract, Insert, VL});
1271 } else {
1272 auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
1273 // Construct the vector length corresponding to RemIdx + length(LitTy).
1274 VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
1275 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1276 ElementCount EndIndex =
1279 if (STI.expandVScale(EndIndex) ==
1280 STI.expandVScale(InterLitTy.getElementCount()))
1282
1283 Inserted =
1284 MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
1285 {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1286 }
1287
1288 // If required, insert this subvector back into the correct vector register.
1289 // This should resolve to an INSERT_SUBREG instruction.
1290 if (NeedInsertSubvec)
1291 MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);
1292
1293 MI.eraseFromParent();
1294 return true;
1295}
1296
1297static unsigned getRISCVWOpcode(unsigned Opcode) {
1298 switch (Opcode) {
1299 default:
1300 llvm_unreachable("Unexpected opcode");
1301 case TargetOpcode::G_ASHR:
1302 return RISCV::G_SRAW;
1303 case TargetOpcode::G_LSHR:
1304 return RISCV::G_SRLW;
1305 case TargetOpcode::G_SHL:
1306 return RISCV::G_SLLW;
1307 case TargetOpcode::G_SDIV:
1308 return RISCV::G_DIVW;
1309 case TargetOpcode::G_UDIV:
1310 return RISCV::G_DIVUW;
1311 case TargetOpcode::G_UREM:
1312 return RISCV::G_REMUW;
1313 case TargetOpcode::G_ROTL:
1314 return RISCV::G_ROLW;
1315 case TargetOpcode::G_ROTR:
1316 return RISCV::G_RORW;
1317 case TargetOpcode::G_CTLZ:
1318 return RISCV::G_CLZW;
1319 case TargetOpcode::G_CTTZ:
1320 return RISCV::G_CTZW;
1321 case TargetOpcode::G_FPTOSI:
1322 return RISCV::G_FCVT_W_RV64;
1323 case TargetOpcode::G_FPTOUI:
1324 return RISCV::G_FCVT_WU_RV64;
1325 }
1326}
1327
1330 LostDebugLocObserver &LocObserver) const {
1331 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1332 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1333 MachineFunction &MF = *MI.getParent()->getParent();
1334 switch (MI.getOpcode()) {
1335 default:
1336 // No idea what to do.
1337 return false;
1338 case TargetOpcode::G_ABS:
1339 return Helper.lowerAbsToMaxNeg(MI);
1340 // TODO: G_FCONSTANT
1341 case TargetOpcode::G_CONSTANT: {
1342 const Function &F = MF.getFunction();
1343 // TODO: if PSI and BFI are present, add " ||
1344 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1345 bool ShouldOptForSize = F.hasOptSize();
1346 const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
1347 if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
1348 return true;
1349 return Helper.lowerConstant(MI);
1350 }
1351 case TargetOpcode::G_SUB:
1352 case TargetOpcode::G_ADD: {
1353 Helper.Observer.changingInstr(MI);
1354 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1355 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1356
1357 Register DstALU = MRI.createGenericVirtualRegister(sXLen);
1358
1359 MachineOperand &MO = MI.getOperand(0);
1360 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1361 auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32);
1362
1363 MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext});
1364 MO.setReg(DstALU);
1365
1366 Helper.Observer.changedInstr(MI);
1367 return true;
1368 }
1369 case TargetOpcode::G_SEXT_INREG: {
1370 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1371 int64_t SizeInBits = MI.getOperand(2).getImm();
1372 // Source size of 32 is sext.w.
1373 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1374 return true;
1375
1376 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1377 return true;
1378
1379 return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
1381 }
1382 case TargetOpcode::G_ASHR:
1383 case TargetOpcode::G_LSHR:
1384 case TargetOpcode::G_SHL: {
1385 if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
1386 // We don't need a custom node for shift by constant. Just widen the
1387 // source and the shift amount.
1388 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1389 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1390 ExtOpc = TargetOpcode::G_SEXT;
1391 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1392 ExtOpc = TargetOpcode::G_ZEXT;
1393
1394 Helper.Observer.changingInstr(MI);
1395 Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
1396 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
1397 Helper.widenScalarDst(MI, sXLen);
1398 Helper.Observer.changedInstr(MI);
1399 return true;
1400 }
1401
1402 Helper.Observer.changingInstr(MI);
1403 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1404 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1405 Helper.widenScalarDst(MI, sXLen);
1406 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1407 Helper.Observer.changedInstr(MI);
1408 return true;
1409 }
1410 case TargetOpcode::G_SDIV:
1411 case TargetOpcode::G_UDIV:
1412 case TargetOpcode::G_UREM:
1413 case TargetOpcode::G_ROTL:
1414 case TargetOpcode::G_ROTR: {
1415 Helper.Observer.changingInstr(MI);
1416 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1417 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1418 Helper.widenScalarDst(MI, sXLen);
1419 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1420 Helper.Observer.changedInstr(MI);
1421 return true;
1422 }
1423 case TargetOpcode::G_CTLZ:
1424 case TargetOpcode::G_CTTZ: {
1425 Helper.Observer.changingInstr(MI);
1426 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1427 Helper.widenScalarDst(MI, sXLen);
1428 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1429 Helper.Observer.changedInstr(MI);
1430 return true;
1431 }
1432 case TargetOpcode::G_FPTOSI:
1433 case TargetOpcode::G_FPTOUI: {
1434 Helper.Observer.changingInstr(MI);
1435 Helper.widenScalarDst(MI, sXLen);
1436 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1438 Helper.Observer.changedInstr(MI);
1439 return true;
1440 }
1441 case TargetOpcode::G_IS_FPCLASS: {
1442 Register GISFPCLASS = MI.getOperand(0).getReg();
1443 Register Src = MI.getOperand(1).getReg();
1444 const MachineOperand &ImmOp = MI.getOperand(2);
1445 MachineIRBuilder MIB(MI);
1446
1447 // Turn LLVM IR's floating point classes to that in RISC-V,
1448 // by simply rotating the 10-bit immediate right by two bits.
1449 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1450 auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
1451 auto ConstZero = MIB.buildConstant(sXLen, 0);
1452
1453 auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
1454 auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
1455 MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
1456
1457 MI.eraseFromParent();
1458 return true;
1459 }
1460 case TargetOpcode::G_BRJT:
1461 return legalizeBRJT(MI, MIRBuilder);
1462 case TargetOpcode::G_VASTART:
1463 return legalizeVAStart(MI, MIRBuilder);
1464 case TargetOpcode::G_VSCALE:
1465 return legalizeVScale(MI, MIRBuilder);
1466 case TargetOpcode::G_ZEXT:
1467 case TargetOpcode::G_SEXT:
1468 case TargetOpcode::G_ANYEXT:
1469 return legalizeExt(MI, MIRBuilder);
1470 case TargetOpcode::G_SPLAT_VECTOR:
1471 return legalizeSplatVector(MI, MIRBuilder);
1472 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1473 return legalizeExtractSubvector(MI, MIRBuilder);
1474 case TargetOpcode::G_INSERT_SUBVECTOR:
1475 return legalizeInsertSubvector(MI, Helper, MIRBuilder);
1476 case TargetOpcode::G_LOAD:
1477 case TargetOpcode::G_STORE:
1478 return legalizeLoadStore(MI, Helper, MIRBuilder);
1479 }
1480
1481 llvm_unreachable("expected switch to return");
1482}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
#define P(N)
ppc ctr loops verify
static LLT getLMUL1Ty(LLT VecTy)
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static std::pair< MachineInstrBuilder, MachineInstrBuilder > buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Gets the two common "VL" operands: an all-ones mask and the vector length.
static LegalityPredicate typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list< LLT > BoolVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, const SrcOp &Scalar, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LegalityPredicate typeIsLegalIntOrFPVec(unsigned TypeIdx, std::initializer_list< LLT > IntOrFPVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, Register Hi, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, std::initializer_list< LLT > PtrVecTys, const RISCVSubtarget &ST)
static unsigned getRISCVWOpcode(unsigned Opcode)
This file declares the targeting of the Machinelegalizer class for RISC-V.
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:1012
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
LLVM_ABI APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition: APInt.cpp:1154
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
@ ICMP_NE
not equal
Definition: InstrTypes.h:700
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:315
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:359
Represents an extract subvector.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:265
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:114
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:212
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:65
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:43
constexpr bool isVector() const
Definition: LowLevelType.h:149
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:58
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:191
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:278
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:64
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildBrIndirect(Register Tgt)
Build and insert G_BRINDIRECT Tgt.
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:72
LLVM_ABI unsigned getEntrySize(const DataLayout &TD) const
getEntrySize - Return the size of each entry in the jump table.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
LLVM_ABI unsigned getEntryAlignment(const DataLayout &TD) const
getEntryAlignment - Return the alignment of each entry in the jump table.
JTEntryKind getEntryKind() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
RISCVLegalizerInfo(const RISCVSubtarget &ST)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getMaxBuildIntsCost() const
bool useConstantPoolForLargeInts() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Register getReg() const
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:226
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static constexpr unsigned RVVBitsPerBlock
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:2029
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1605
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1587
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:433
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:299
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< LLT > Types
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.