LLVM 22.0.0git
RISCVLegalizerInfo.cpp
Go to the documentation of this file.
1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
16#include "RISCVSubtarget.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsRISCV.h"
31#include "llvm/IR/Type.h"
32
33using namespace llvm;
34using namespace LegalityPredicates;
35using namespace LegalizeMutations;
36
38typeIsLegalIntOrFPVec(unsigned TypeIdx,
39 std::initializer_list<LLT> IntOrFPVecTys,
40 const RISCVSubtarget &ST) {
41 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
42 return ST.hasVInstructions() &&
43 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
44 ST.hasVInstructionsI64()) &&
45 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
46 ST.getELen() == 64);
47 };
48
49 return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
50}
51
53typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
54 const RISCVSubtarget &ST) {
55 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
56 return ST.hasVInstructions() &&
57 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
58 ST.getELen() == 64);
59 };
60 return all(typeInSet(TypeIdx, BoolVecTys), P);
61}
62
63static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
64 std::initializer_list<LLT> PtrVecTys,
65 const RISCVSubtarget &ST) {
66 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
67 return ST.hasVInstructions() &&
68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
69 ST.getELen() == 64) &&
70 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
71 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
72 };
73 return all(typeInSet(TypeIdx, PtrVecTys), P);
74}
75
77 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
78 const LLT sDoubleXLen = LLT::scalar(2 * XLen);
79 const LLT p0 = LLT::pointer(0, XLen);
80 const LLT s1 = LLT::scalar(1);
81 const LLT s8 = LLT::scalar(8);
82 const LLT s16 = LLT::scalar(16);
83 const LLT s32 = LLT::scalar(32);
84 const LLT s64 = LLT::scalar(64);
85 const LLT s128 = LLT::scalar(128);
86
87 const LLT nxv1s1 = LLT::scalable_vector(1, s1);
88 const LLT nxv2s1 = LLT::scalable_vector(2, s1);
89 const LLT nxv4s1 = LLT::scalable_vector(4, s1);
90 const LLT nxv8s1 = LLT::scalable_vector(8, s1);
91 const LLT nxv16s1 = LLT::scalable_vector(16, s1);
92 const LLT nxv32s1 = LLT::scalable_vector(32, s1);
93 const LLT nxv64s1 = LLT::scalable_vector(64, s1);
94
95 const LLT nxv1s8 = LLT::scalable_vector(1, s8);
96 const LLT nxv2s8 = LLT::scalable_vector(2, s8);
97 const LLT nxv4s8 = LLT::scalable_vector(4, s8);
98 const LLT nxv8s8 = LLT::scalable_vector(8, s8);
99 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
100 const LLT nxv32s8 = LLT::scalable_vector(32, s8);
101 const LLT nxv64s8 = LLT::scalable_vector(64, s8);
102
103 const LLT nxv1s16 = LLT::scalable_vector(1, s16);
104 const LLT nxv2s16 = LLT::scalable_vector(2, s16);
105 const LLT nxv4s16 = LLT::scalable_vector(4, s16);
106 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
107 const LLT nxv16s16 = LLT::scalable_vector(16, s16);
108 const LLT nxv32s16 = LLT::scalable_vector(32, s16);
109
110 const LLT nxv1s32 = LLT::scalable_vector(1, s32);
111 const LLT nxv2s32 = LLT::scalable_vector(2, s32);
112 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
113 const LLT nxv8s32 = LLT::scalable_vector(8, s32);
114 const LLT nxv16s32 = LLT::scalable_vector(16, s32);
115
116 const LLT nxv1s64 = LLT::scalable_vector(1, s64);
117 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
118 const LLT nxv4s64 = LLT::scalable_vector(4, s64);
119 const LLT nxv8s64 = LLT::scalable_vector(8, s64);
120
121 const LLT nxv1p0 = LLT::scalable_vector(1, p0);
122 const LLT nxv2p0 = LLT::scalable_vector(2, p0);
123 const LLT nxv4p0 = LLT::scalable_vector(4, p0);
124 const LLT nxv8p0 = LLT::scalable_vector(8, p0);
125 const LLT nxv16p0 = LLT::scalable_vector(16, p0);
126
127 using namespace TargetOpcode;
128
129 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
130
131 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
132 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
133 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
134 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
135
136 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
137
138 getActionDefinitionsBuilder({G_ADD, G_SUB})
139 .legalFor({sXLen})
140 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
141 .customFor(ST.is64Bit(), {s32})
143 .clampScalar(0, sXLen, sXLen);
144
145 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
146 .legalFor({sXLen})
147 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
149 .clampScalar(0, sXLen, sXLen);
150
152 {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
153
154 getActionDefinitionsBuilder({G_SADDO, G_SADDE, G_SSUBO})
155 .minScalar(0, sXLen)
156 .lower();
157
158 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
160 {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT})
161 .lower();
162
163 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
164 .legalFor({{sXLen, sXLen}})
165 .customFor(ST.is64Bit(), {{s32, s32}})
166 .widenScalarToNextPow2(0)
167 .clampScalar(1, sXLen, sXLen)
168 .clampScalar(0, sXLen, sXLen);
169
170 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
171 .legalFor({{s32, s16}})
172 .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}})
173 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
174 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
175 .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
176 .maxScalar(0, sXLen);
177
178 getActionDefinitionsBuilder(G_SEXT_INREG)
179 .customFor({sXLen})
180 .clampScalar(0, sXLen, sXLen)
181 .lower();
182
183 // Merge/Unmerge
184 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
185 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
186 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
187 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
188 if (XLen == 32 && ST.hasStdExtD()) {
189 MergeUnmergeActions.legalIf(
190 all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
191 }
192 MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
193 .widenScalarToNextPow2(BigTyIdx, XLen)
194 .clampScalar(LitTyIdx, sXLen, sXLen)
195 .clampScalar(BigTyIdx, sXLen, sXLen);
196 }
197
198 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
199
200 getActionDefinitionsBuilder({G_ROTR, G_ROTL})
201 .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}})
202 .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
203 {{s32, s32}})
204 .lower();
205
206 getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
207
208 getActionDefinitionsBuilder(G_BITCAST).legalIf(
210 typeIsLegalBoolVec(0, BoolVecTys, ST)),
212 typeIsLegalBoolVec(1, BoolVecTys, ST))));
213
214 auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
215 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
216 BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
217 else
218 BSWAPActions.maxScalar(0, sXLen).lower();
219
220 auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
221 auto &CountZerosUndefActions =
222 getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
223 if (ST.hasStdExtZbb()) {
224 CountZerosActions.legalFor({{sXLen, sXLen}})
225 .customFor({{s32, s32}})
226 .clampScalar(0, s32, sXLen)
227 .widenScalarToNextPow2(0)
228 .scalarSameSizeAs(1, 0);
229 } else {
230 CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
231 CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
232 }
233 CountZerosUndefActions.lower();
234
235 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
236 if (ST.hasStdExtZbb()) {
237 CTPOPActions.legalFor({{sXLen, sXLen}})
238 .clampScalar(0, sXLen, sXLen)
239 .scalarSameSizeAs(1, 0);
240 } else {
241 CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
242 }
243
244 getActionDefinitionsBuilder(G_CONSTANT)
245 .legalFor({p0})
246 .legalFor(!ST.is64Bit(), {s32})
247 .customFor(ST.is64Bit(), {s64})
248 .widenScalarToNextPow2(0)
249 .clampScalar(0, sXLen, sXLen);
250
251 // TODO: transform illegal vector types into legal vector type
252 getActionDefinitionsBuilder(G_FREEZE)
253 .legalFor({s16, s32, p0})
254 .legalFor(ST.is64Bit(), {s64})
255 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
256 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
257 .widenScalarToNextPow2(0)
258 .clampScalar(0, s16, sXLen);
259
260 // TODO: transform illegal vector types into legal vector type
261 // TODO: Merge with G_FREEZE?
262 getActionDefinitionsBuilder(
263 {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
264 .legalFor({s32, sXLen, p0})
265 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
266 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
267 .widenScalarToNextPow2(0)
268 .clampScalar(0, s32, sXLen);
269
270 getActionDefinitionsBuilder(G_ICMP)
271 .legalFor({{sXLen, sXLen}, {sXLen, p0}})
272 .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
273 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
274 .widenScalarOrEltToNextPow2OrMinSize(1, 8)
275 .clampScalar(1, sXLen, sXLen)
276 .clampScalar(0, sXLen, sXLen);
277
278 getActionDefinitionsBuilder(G_SELECT)
279 .legalFor({{s32, sXLen}, {p0, sXLen}})
280 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
281 typeIsLegalBoolVec(1, BoolVecTys, ST)))
282 .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}})
283 .widenScalarToNextPow2(0)
284 .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
285 .clampScalar(1, sXLen, sXLen);
286
287 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
288 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
289 auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});
290
291 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
292 // is supported, we only require byte alignment. Otherwise, we need the memory
293 // op to be natively aligned.
294 auto getScalarMemAlign = [&ST](unsigned Size) {
295 return ST.enableUnalignedScalarMem() ? 8 : Size;
296 };
297
298 LoadActions.legalForTypesWithMemDesc(
299 {{s16, p0, s8, getScalarMemAlign(8)},
300 {s32, p0, s8, getScalarMemAlign(8)},
301 {s16, p0, s16, getScalarMemAlign(16)},
302 {s32, p0, s16, getScalarMemAlign(16)},
303 {s32, p0, s32, getScalarMemAlign(32)},
304 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
305 StoreActions.legalForTypesWithMemDesc(
306 {{s16, p0, s8, getScalarMemAlign(8)},
307 {s32, p0, s8, getScalarMemAlign(8)},
308 {s16, p0, s16, getScalarMemAlign(16)},
309 {s32, p0, s16, getScalarMemAlign(16)},
310 {s32, p0, s32, getScalarMemAlign(32)},
311 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
312 ExtLoadActions.legalForTypesWithMemDesc(
313 {{sXLen, p0, s8, getScalarMemAlign(8)},
314 {sXLen, p0, s16, getScalarMemAlign(16)}});
315 if (XLen == 64) {
316 LoadActions.legalForTypesWithMemDesc(
317 {{s64, p0, s8, getScalarMemAlign(8)},
318 {s64, p0, s16, getScalarMemAlign(16)},
319 {s64, p0, s32, getScalarMemAlign(32)},
320 {s64, p0, s64, getScalarMemAlign(64)}});
321 StoreActions.legalForTypesWithMemDesc(
322 {{s64, p0, s8, getScalarMemAlign(8)},
323 {s64, p0, s16, getScalarMemAlign(16)},
324 {s64, p0, s32, getScalarMemAlign(32)},
325 {s64, p0, s64, getScalarMemAlign(64)}});
326 ExtLoadActions.legalForTypesWithMemDesc(
327 {{s64, p0, s32, getScalarMemAlign(32)}});
328 } else if (ST.hasStdExtD()) {
329 LoadActions.legalForTypesWithMemDesc(
330 {{s64, p0, s64, getScalarMemAlign(64)}});
331 StoreActions.legalForTypesWithMemDesc(
332 {{s64, p0, s64, getScalarMemAlign(64)}});
333 }
334
335 // Vector loads/stores.
336 if (ST.hasVInstructions()) {
337 LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
338 {nxv4s8, p0, nxv4s8, 8},
339 {nxv8s8, p0, nxv8s8, 8},
340 {nxv16s8, p0, nxv16s8, 8},
341 {nxv32s8, p0, nxv32s8, 8},
342 {nxv64s8, p0, nxv64s8, 8},
343 {nxv2s16, p0, nxv2s16, 16},
344 {nxv4s16, p0, nxv4s16, 16},
345 {nxv8s16, p0, nxv8s16, 16},
346 {nxv16s16, p0, nxv16s16, 16},
347 {nxv32s16, p0, nxv32s16, 16},
348 {nxv2s32, p0, nxv2s32, 32},
349 {nxv4s32, p0, nxv4s32, 32},
350 {nxv8s32, p0, nxv8s32, 32},
351 {nxv16s32, p0, nxv16s32, 32}});
352 StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
353 {nxv4s8, p0, nxv4s8, 8},
354 {nxv8s8, p0, nxv8s8, 8},
355 {nxv16s8, p0, nxv16s8, 8},
356 {nxv32s8, p0, nxv32s8, 8},
357 {nxv64s8, p0, nxv64s8, 8},
358 {nxv2s16, p0, nxv2s16, 16},
359 {nxv4s16, p0, nxv4s16, 16},
360 {nxv8s16, p0, nxv8s16, 16},
361 {nxv16s16, p0, nxv16s16, 16},
362 {nxv32s16, p0, nxv32s16, 16},
363 {nxv2s32, p0, nxv2s32, 32},
364 {nxv4s32, p0, nxv4s32, 32},
365 {nxv8s32, p0, nxv8s32, 32},
366 {nxv16s32, p0, nxv16s32, 32}});
367
368 if (ST.getELen() == 64) {
369 LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
370 {nxv1s16, p0, nxv1s16, 16},
371 {nxv1s32, p0, nxv1s32, 32}});
372 StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
373 {nxv1s16, p0, nxv1s16, 16},
374 {nxv1s32, p0, nxv1s32, 32}});
375 }
376
377 if (ST.hasVInstructionsI64()) {
378 LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
379 {nxv2s64, p0, nxv2s64, 64},
380 {nxv4s64, p0, nxv4s64, 64},
381 {nxv8s64, p0, nxv8s64, 64}});
382 StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
383 {nxv2s64, p0, nxv2s64, 64},
384 {nxv4s64, p0, nxv4s64, 64},
385 {nxv8s64, p0, nxv8s64, 64}});
386 }
387
388 // we will take the custom lowering logic if we have scalable vector types
389 // with non-standard alignments
390 LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
391 StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
392
393 // Pointers require that XLen sized elements are legal.
394 if (XLen <= ST.getELen()) {
395 LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
396 StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
397 }
398 }
399
400 LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8)
401 .lowerIfMemSizeNotByteSizePow2()
402 .clampScalar(0, s16, sXLen)
403 .lower();
404 StoreActions
405 .clampScalar(0, s16, sXLen)
406 .lowerIfMemSizeNotByteSizePow2()
407 .lower();
408
409 ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower();
410
411 getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
412
413 getActionDefinitionsBuilder(G_PTRTOINT)
414 .legalFor({{sXLen, p0}})
415 .clampScalar(0, sXLen, sXLen);
416
417 getActionDefinitionsBuilder(G_INTTOPTR)
418 .legalFor({{p0, sXLen}})
419 .clampScalar(1, sXLen, sXLen);
420
421 getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
422
423 getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}});
424
425 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
426
427 getActionDefinitionsBuilder(G_PHI)
428 .legalFor({p0, s32, sXLen})
429 .widenScalarToNextPow2(0)
430 .clampScalar(0, s32, sXLen);
431
432 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
433 .legalFor({p0});
434
435 if (ST.hasStdExtZmmul()) {
436 getActionDefinitionsBuilder(G_MUL)
437 .legalFor({sXLen})
438 .widenScalarToNextPow2(0)
439 .clampScalar(0, sXLen, sXLen);
440
441 // clang-format off
442 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
443 .legalFor({sXLen})
444 .lower();
445 // clang-format on
446
447 getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
448 } else {
449 getActionDefinitionsBuilder(G_MUL)
450 .libcallFor({sXLen, sDoubleXLen})
451 .widenScalarToNextPow2(0)
452 .clampScalar(0, sXLen, sDoubleXLen);
453
454 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
455
456 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
457 .minScalar(0, sXLen)
458 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
459 // the low bits for the mul result and high bits to do the overflow
460 // check.
461 .widenScalarIf(typeIs(0, sXLen),
462 LegalizeMutations::changeTo(0, sDoubleXLen))
463 .lower();
464 }
465
466 if (ST.hasStdExtM()) {
467 getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM})
468 .legalFor({sXLen})
469 .customFor({s32})
470 .libcallFor({sDoubleXLen})
471 .clampScalar(0, s32, sDoubleXLen)
472 .widenScalarToNextPow2(0);
473 getActionDefinitionsBuilder(G_SREM)
474 .legalFor({sXLen})
475 .libcallFor({sDoubleXLen})
476 .clampScalar(0, sXLen, sDoubleXLen)
477 .widenScalarToNextPow2(0);
478 } else {
479 getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
480 .libcallFor({sXLen, sDoubleXLen})
481 .clampScalar(0, sXLen, sDoubleXLen)
482 .widenScalarToNextPow2(0);
483 }
484
485 // TODO: Use libcall for sDoubleXLen.
486 getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower();
487
488 getActionDefinitionsBuilder(G_ABS)
489 .customFor(ST.hasStdExtZbb(), {sXLen})
490 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
491 .lower();
492
493 getActionDefinitionsBuilder({G_ABDS, G_ABDU})
494 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
495 .lower();
496
497 getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
498 .legalFor(ST.hasStdExtZbb(), {sXLen})
499 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
500 .lower();
501
502 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
503
504 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
505
506 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
507
508 getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
509 .lower();
510
511 // FP Operations
512
513 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
514 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT,
515 G_FMAXNUM, G_FMINNUM, G_FMAXIMUMNUM,
516 G_FMINIMUMNUM})
517 .legalFor(ST.hasStdExtF(), {s32})
518 .legalFor(ST.hasStdExtD(), {s64})
519 .legalFor(ST.hasStdExtZfh(), {s16})
520 .libcallFor({s32, s64})
521 .libcallFor(ST.is64Bit(), {s128});
522
523 getActionDefinitionsBuilder({G_FNEG, G_FABS})
524 .legalFor(ST.hasStdExtF(), {s32})
525 .legalFor(ST.hasStdExtD(), {s64})
526 .legalFor(ST.hasStdExtZfh(), {s16})
527 .lowerFor({s32, s64, s128});
528
529 getActionDefinitionsBuilder(G_FREM)
530 .libcallFor({s32, s64})
531 .libcallFor(ST.is64Bit(), {s128})
532 .minScalar(0, s32)
533 .scalarize(0);
534
535 getActionDefinitionsBuilder(G_FCOPYSIGN)
536 .legalFor(ST.hasStdExtF(), {{s32, s32}})
537 .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}})
538 .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}})
539 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}})
540 .lower();
541
542 // FIXME: Use Zfhmin.
543 getActionDefinitionsBuilder(G_FPTRUNC)
544 .legalFor(ST.hasStdExtD(), {{s32, s64}})
545 .legalFor(ST.hasStdExtZfh(), {{s16, s32}})
546 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
547 .libcallFor({{s32, s64}})
548 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
549 getActionDefinitionsBuilder(G_FPEXT)
550 .legalFor(ST.hasStdExtD(), {{s64, s32}})
551 .legalFor(ST.hasStdExtZfh(), {{s32, s16}})
552 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
553 .libcallFor({{s64, s32}})
554 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
555
556 getActionDefinitionsBuilder(G_FCMP)
557 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
558 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
559 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
560 .clampScalar(0, sXLen, sXLen)
561 .libcallFor({{sXLen, s32}, {sXLen, s64}})
562 .libcallFor(ST.is64Bit(), {{sXLen, s128}});
563
564 // TODO: Support vector version of G_IS_FPCLASS.
565 getActionDefinitionsBuilder(G_IS_FPCLASS)
566 .customFor(ST.hasStdExtF(), {{s1, s32}})
567 .customFor(ST.hasStdExtD(), {{s1, s64}})
568 .customFor(ST.hasStdExtZfh(), {{s1, s16}})
569 .lowerFor({{s1, s32}, {s1, s64}});
570
571 getActionDefinitionsBuilder(G_FCONSTANT)
572 .legalFor(ST.hasStdExtF(), {s32})
573 .legalFor(ST.hasStdExtD(), {s64})
574 .legalFor(ST.hasStdExtZfh(), {s16})
575 .customFor(!ST.is64Bit(), {s32})
576 .customFor(ST.is64Bit(), {s32, s64})
577 .lowerFor({s64, s128});
578
579 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
580 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
581 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
582 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
583 .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}})
584 .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}})
585 .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}})
586 .widenScalarToNextPow2(0)
587 .minScalar(0, s32)
588 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
589 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
590 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});
591
592 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
593 .legalFor(ST.hasStdExtF(), {{s32, sXLen}})
594 .legalFor(ST.hasStdExtD(), {{s64, sXLen}})
595 .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}})
596 .widenScalarToNextPow2(1)
597 // Promote to XLen if the operation is legal.
598 .widenScalarIf(
599 [=, &ST](const LegalityQuery &Query) {
600 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
601 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
602 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
603 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
604 (ST.hasStdExtZfh() &&
605 Query.Types[0].getSizeInBits() == 16));
606 },
608 // Otherwise only promote to s32 since we have si libcalls.
609 .minScalar(1, s32)
610 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
611 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
612 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});
613
614 // FIXME: We can do custom inline expansion like SelectionDAG.
615 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
616 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
617 G_INTRINSIC_ROUNDEVEN})
618 .legalFor(ST.hasStdExtZfa(), {s32})
619 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
620 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16})
621 .libcallFor({s32, s64})
622 .libcallFor(ST.is64Bit(), {s128});
623
624 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
625 .legalFor(ST.hasStdExtZfa(), {s32})
626 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
627 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16});
628
629 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
630 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
631 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
632 G_FTANH})
633 .libcallFor({s32, s64})
634 .libcallFor(ST.is64Bit(), {s128});
635 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
636 .libcallFor({{s32, s32}, {s64, s32}})
637 .libcallFor(ST.is64Bit(), {s128, s32});
638
639 getActionDefinitionsBuilder(G_VASTART).customFor({p0});
640
641 // va_list must be a pointer, but most sized types are pretty easy to handle
642 // as the destination.
643 getActionDefinitionsBuilder(G_VAARG)
644 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
645 // other than sXLen.
646 .clampScalar(0, sXLen, sXLen)
647 .lowerForCartesianProduct({sXLen, p0}, {p0});
648
649 getActionDefinitionsBuilder(G_VSCALE)
650 .clampScalar(0, sXLen, sXLen)
651 .customFor({sXLen});
652
653 auto &SplatActions =
654 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
655 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
656 typeIs(1, sXLen)))
657 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
658 // Handle case of s64 element vectors on RV32. If the subtarget does not have
659 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
660 // does have f64, then we don't know whether the type is an f64 or an i64,
661 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
662 // depending on how the instructions it consumes are legalized. They are not
663 // legalized yet since legalization is in reverse postorder, so we cannot
664 // make the decision at this moment.
665 if (XLen == 32) {
666 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
667 SplatActions.legalIf(all(
668 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
669 else if (ST.hasVInstructionsI64())
670 SplatActions.customIf(all(
671 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
672 }
673
674 SplatActions.clampScalar(1, sXLen, sXLen);
675
676 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
677 LLT DstTy = Query.Types[0];
678 LLT SrcTy = Query.Types[1];
679 return DstTy.getElementType() == LLT::scalar(1) &&
680 DstTy.getElementCount().getKnownMinValue() >= 8 &&
681 SrcTy.getElementCount().getKnownMinValue() >= 8;
682 };
683 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
684 // We don't have the ability to slide mask vectors down indexed by their
685 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
686 // to equivalent i8 vectors.
687 .bitcastIf(
688 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
689 typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
690 [=](const LegalityQuery &Query) {
691 LLT CastTy = LLT::vector(
692 Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
693 return std::pair(0, CastTy);
694 })
695 .customIf(LegalityPredicates::any(
696 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
697 typeIsLegalBoolVec(1, BoolVecTys, ST)),
698 all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
699 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));
700
701 getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
702 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
703 typeIsLegalBoolVec(1, BoolVecTys, ST)))
704 .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
705 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
706
707 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
708 .lowerIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(2, p0)));
709
710 getActionDefinitionsBuilder({G_ATOMIC_CMPXCHG, G_ATOMICRMW_ADD})
711 .legalFor(ST.hasStdExtA(), {{sXLen, p0}})
712 .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
713 .clampScalar(0, sXLen, sXLen);
714
715 getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
716 .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
717 .clampScalar(0, sXLen, sXLen)
718 .lower();
719
720 getLegacyLegalizerInfo().computeTables();
721 verify(*ST.getInstrInfo());
722}
723
725 MachineInstr &MI) const {
726 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
727
728 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID))
729 return true;
730
731 switch (IntrinsicID) {
732 default:
733 return false;
734 case Intrinsic::vacopy: {
735 // vacopy arguments must be legal because of the intrinsic signature.
736 // No need to check here.
737
738 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
739 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
740 MachineFunction &MF = *MI.getMF();
741 const DataLayout &DL = MIRBuilder.getDataLayout();
742 LLVMContext &Ctx = MF.getFunction().getContext();
743
744 Register DstLst = MI.getOperand(1).getReg();
745 LLT PtrTy = MRI.getType(DstLst);
746
747 // Load the source va_list
748 Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
750 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
751 auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
752
753 // Store the result in the destination va_list
756 MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);
757
758 MI.eraseFromParent();
759 return true;
760 }
761 case Intrinsic::riscv_masked_atomicrmw_add:
762 case Intrinsic::riscv_masked_atomicrmw_sub:
763 case Intrinsic::riscv_masked_cmpxchg:
764 return true;
765 }
766}
767
768bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
769 MachineIRBuilder &MIRBuilder) const {
770 // Stores the address of the VarArgsFrameIndex slot into the memory location
771 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
772 MachineFunction *MF = MI.getParent()->getParent();
774 int FI = FuncInfo->getVarArgsFrameIndex();
775 LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
776 auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
777 assert(MI.hasOneMemOperand());
778 MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
779 *MI.memoperands()[0]);
780 MI.eraseFromParent();
781 return true;
782}
783
784bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
785 MachineIRBuilder &MIRBuilder) const {
786 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
787 auto &MF = *MI.getParent()->getParent();
788 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
789 unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
790
791 Register PtrReg = MI.getOperand(0).getReg();
792 LLT PtrTy = MRI.getType(PtrReg);
793 Register IndexReg = MI.getOperand(2).getReg();
794 LLT IndexTy = MRI.getType(IndexReg);
795
796 if (!isPowerOf2_32(EntrySize))
797 return false;
798
799 auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize));
800 IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0);
801
802 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg);
803
806 EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())));
807
808 Register TargetReg;
809 switch (MJTI->getEntryKind()) {
810 default:
811 return false;
813 // For PIC, the sequence is:
814 // BRIND(load(Jumptable + index) + RelocBase)
815 // RelocBase can be JumpTable, GOT or some sort of global base.
816 unsigned LoadOpc =
817 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
818 auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO);
819 TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0);
820 break;
821 }
823 auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy,
824 Addr, *MMO);
825 TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0);
826 break;
827 }
829 TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0);
830 break;
831 }
832
833 MIRBuilder.buildBrIndirect(TargetReg);
834
835 MI.eraseFromParent();
836 return true;
837}
838
839bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
840 bool ShouldOptForSize) const {
841 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
842 int64_t Imm = APImm.getSExtValue();
843 // All simm32 constants should be handled by isel.
844 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
845 // this check redundant, but small immediates are common so this check
846 // should have better compile time.
847 if (isInt<32>(Imm))
848 return false;
849
850 // We only need to cost the immediate, if constant pool lowering is enabled.
851 if (!STI.useConstantPoolForLargeInts())
852 return false;
853
855 if (Seq.size() <= STI.getMaxBuildIntsCost())
856 return false;
857
858 // Optimizations below are disabled for opt size. If we're optimizing for
859 // size, use a constant pool.
860 if (ShouldOptForSize)
861 return true;
862 //
863 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
864 // that if it will avoid a constant pool.
865 // It will require an extra temporary register though.
866 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
867 // low and high 32 bits are the same and bit 31 and 63 are set.
868 unsigned ShiftAmt, AddOpc;
870 RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
871 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
872}
873
874bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
875 MachineIRBuilder &MIB) const {
876 const LLT XLenTy(STI.getXLenVT());
877 Register Dst = MI.getOperand(0).getReg();
878
879 // We define our scalable vector types for lmul=1 to use a 64 bit known
880 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
881 // vscale as VLENB / 8.
882 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
883 if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
884 // Support for VLEN==32 is incomplete.
885 return false;
886
887 // We assume VLENB is a multiple of 8. We manually choose the best shift
888 // here because SimplifyDemandedBits isn't always able to simplify it.
889 uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
890 if (isPowerOf2_64(Val)) {
891 uint64_t Log2 = Log2_64(Val);
892 if (Log2 < 3) {
893 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
894 MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
895 } else if (Log2 > 3) {
896 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
897 MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
898 } else {
899 MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
900 }
901 } else if ((Val % 8) == 0) {
902 // If the multiplier is a multiple of 8, scale it down to avoid needing
903 // to shift the VLENB value.
904 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
905 MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
906 } else {
907 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
908 auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
909 MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
910 }
911 MI.eraseFromParent();
912 return true;
913}
914
915// Custom-lower extensions from mask vectors by using a vselect either with 1
916// for zero/any-extension or -1 for sign-extension:
917// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
918// Note that any-extension is lowered identically to zero-extension.
919bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
920 MachineIRBuilder &MIB) const {
921
922 unsigned Opc = MI.getOpcode();
923 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
924 Opc == TargetOpcode::G_ANYEXT);
925
926 MachineRegisterInfo &MRI = *MIB.getMRI();
927 Register Dst = MI.getOperand(0).getReg();
928 Register Src = MI.getOperand(1).getReg();
929
930 LLT DstTy = MRI.getType(Dst);
931 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
932 LLT DstEltTy = DstTy.getElementType();
933 auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
934 auto SplatTrue =
935 MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
936 MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);
937
938 MI.eraseFromParent();
939 return true;
940}
941
942bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
943 LegalizerHelper &Helper,
944 MachineIRBuilder &MIB) const {
946 "Machine instructions must be Load/Store.");
947 MachineRegisterInfo &MRI = *MIB.getMRI();
948 MachineFunction *MF = MI.getMF();
949 const DataLayout &DL = MIB.getDataLayout();
950 LLVMContext &Ctx = MF->getFunction().getContext();
951
952 Register DstReg = MI.getOperand(0).getReg();
953 LLT DataTy = MRI.getType(DstReg);
954 if (!DataTy.isVector())
955 return false;
956
957 if (!MI.hasOneMemOperand())
958 return false;
959
960 MachineMemOperand *MMO = *MI.memoperands_begin();
961
962 const auto *TLI = STI.getTargetLowering();
963 EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx));
964
965 if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO))
966 return true;
967
968 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
969 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
970 "Unexpected unaligned RVV load type");
971
972 // Calculate the new vector type with i8 elements
973 unsigned NumElements =
974 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
975 LLT NewDataTy = LLT::scalable_vector(NumElements, 8);
976
977 Helper.bitcast(MI, 0, NewDataTy);
978
979 return true;
980}
981
982/// Return the type of the mask type suitable for masking the provided
983/// vector type. This is simply an i1 element type vector of the same
984/// (possibly scalable) length.
985static LLT getMaskTypeFor(LLT VecTy) {
986 assert(VecTy.isVector());
987 ElementCount EC = VecTy.getElementCount();
988 return LLT::vector(EC, LLT::scalar(1));
989}
990
991/// Creates an all ones mask suitable for masking a vector of type VecTy with
992/// vector length VL.
994 MachineIRBuilder &MIB,
996 LLT MaskTy = getMaskTypeFor(VecTy);
997 return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
998}
999
1000/// Gets the two common "VL" operands: an all-ones mask and the vector length.
1001/// VecTy is a scalable vector type.
1002static std::pair<MachineInstrBuilder, MachineInstrBuilder>
1004 assert(VecTy.isScalableVector() && "Expecting scalable container type");
1005 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
1006 LLT XLenTy(STI.getXLenVT());
1007 auto VL = MIB.buildConstant(XLenTy, -1);
1008 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
1009 return {Mask, VL};
1010}
1011
1013buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
1014 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
1016 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
1017 // splat Lo even if it might be sign extended. I don't think we have
1018 // introduced a case where we're build a s64 where the upper bits are undef
1019 // yet.
1020
1021 // Fall back to a stack store and stride x0 vector load.
1022 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
1023 // preprocessDAG in SDAG.
1024 return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
1025 {Passthru, Lo, Hi, VL});
1026}
1027
1029buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
1030 const SrcOp &Scalar, const SrcOp &VL,
1032 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1033 auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
1034 return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
1035 Unmerge.getReg(1), VL, MIB, MRI);
1036}
1037
1038// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1039// legal equivalently-sized i8 type, so we can use that as a go-between.
1040// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1041// VMCLR_VL.
1042bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1043 MachineIRBuilder &MIB) const {
1044 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1045
1046 MachineRegisterInfo &MRI = *MIB.getMRI();
1047
1048 Register Dst = MI.getOperand(0).getReg();
1049 Register SplatVal = MI.getOperand(1).getReg();
1050
1051 LLT VecTy = MRI.getType(Dst);
1052 LLT XLenTy(STI.getXLenVT());
1053
1054 // Handle case of s64 element vectors on rv32
1055 if (XLenTy.getSizeInBits() == 32 &&
1056 VecTy.getElementType().getSizeInBits() == 64) {
1057 auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
1058 buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
1059 MRI);
1060 MI.eraseFromParent();
1061 return true;
1062 }
1063
1064 // All-zeros or all-ones splats are handled specially.
1065 MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
1066 if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
1067 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1068 MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
1069 MI.eraseFromParent();
1070 return true;
1071 }
1072 if (isNullOrNullSplat(SplatValMI, MRI)) {
1073 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1074 MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
1075 MI.eraseFromParent();
1076 return true;
1077 }
1078
1079 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1080 // ones) by promoting it to an s8 splat.
1081 LLT InterEltTy = LLT::scalar(8);
1082 LLT InterTy = VecTy.changeElementType(InterEltTy);
1083 auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
1084 auto And =
1085 MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
1086 auto LHS = MIB.buildSplatVector(InterTy, And);
1087 auto ZeroSplat =
1088 MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
1089 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
1090 MI.eraseFromParent();
1091 return true;
1092}
1093
1094static LLT getLMUL1Ty(LLT VecTy) {
1095 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1096 "Unexpected vector LLT");
1098 VecTy.getElementType().getSizeInBits(),
1099 VecTy.getElementType());
1100}
1101
1102bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1103 MachineIRBuilder &MIB) const {
1104 GExtractSubvector &ES = cast<GExtractSubvector>(MI);
1105
1106 MachineRegisterInfo &MRI = *MIB.getMRI();
1107
1108 Register Dst = ES.getReg(0);
1109 Register Src = ES.getSrcVec();
1110 uint64_t Idx = ES.getIndexImm();
1111
1112 // With an index of 0 this is a cast-like subvector, which can be performed
1113 // with subregister operations.
1114 if (Idx == 0)
1115 return true;
1116
1117 LLT LitTy = MRI.getType(Dst);
1118 LLT BigTy = MRI.getType(Src);
1119
1120 if (LitTy.getElementType() == LLT::scalar(1)) {
1121 // We can't slide this mask vector up indexed by its i1 elements.
1122 // This poses a problem when we wish to insert a scalable vector which
1123 // can't be re-expressed as a larger type. Just choose the slow path and
1124 // extend to a larger type, then truncate back down.
1125 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1126 LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
1127 auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
1128 auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
1129 auto SplatZero = MIB.buildSplatVector(
1130 ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
1131 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
1132 MI.eraseFromParent();
1133 return true;
1134 }
1135
1136 // extract_subvector scales the index by vscale if the subvector is scalable,
1137 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1138 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1139 MVT LitTyMVT = getMVTForLLT(LitTy);
1140 auto Decompose =
1142 getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
1143 unsigned RemIdx = Decompose.second;
1144
1145 // If the Idx has been completely eliminated then this is a subvector extract
1146 // which naturally aligns to a vector register. These can easily be handled
1147 // using subregister manipulation.
1148 if (RemIdx == 0)
1149 return true;
1150
1151 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1152 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1153 // divide exactly.
1154 assert(
1157
1158 // If the vector type is an LMUL-group type, extract a subvector equal to the
1159 // nearest full vector register type.
1160 LLT InterLitTy = BigTy;
1161 Register Vec = Src;
1163 getLMUL1Ty(BigTy).getSizeInBits())) {
1164 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1165 // we should have successfully decomposed the extract into a subregister.
1166 assert(Decompose.first != RISCV::NoSubRegister);
1167 InterLitTy = getLMUL1Ty(BigTy);
1168 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1169 // specified on the source Register (the equivalent) since generic virtual
1170 // register does not allow subregister index.
1171 Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
1172 }
1173
1174 // Slide this vector register down by the desired number of elements in order
1175 // to place the desired subvector starting at element 0.
1176 const LLT XLenTy(STI.getXLenVT());
1177 auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
1178 auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
1180 auto Slidedown = MIB.buildInstr(
1181 RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1182 {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1183
1184 // Now the vector is in the right position, extract our final subvector. This
1185 // should resolve to a COPY.
1186 MIB.buildExtractSubvector(Dst, Slidedown, 0);
1187
1188 MI.eraseFromParent();
1189 return true;
1190}
1191
1192bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1193 LegalizerHelper &Helper,
1194 MachineIRBuilder &MIB) const {
1195 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1196
1197 MachineRegisterInfo &MRI = *MIB.getMRI();
1198
1199 Register Dst = IS.getReg(0);
1200 Register BigVec = IS.getBigVec();
1201 Register LitVec = IS.getSubVec();
1202 uint64_t Idx = IS.getIndexImm();
1203
1204 LLT BigTy = MRI.getType(BigVec);
1205 LLT LitTy = MRI.getType(LitVec);
1206
1207 if (Idx == 0 ||
1208 MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1209 return true;
1210
1211 // We don't have the ability to slide mask vectors up indexed by their i1
1212 // elements; the smallest we can do is i8. Often we are able to bitcast to
1213 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1214 // vectors and truncate down after the insert.
1215 if (LitTy.getElementType() == LLT::scalar(1)) {
1216 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1217 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1218 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1219 return Helper.bitcast(
1220 IS, 0,
1222
1223 // We can't slide this mask vector up indexed by its i1 elements.
1224 // This poses a problem when we wish to insert a scalable vector which
1225 // can't be re-expressed as a larger type. Just choose the slow path and
1226 // extend to a larger type, then truncate back down.
1227 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1228 return Helper.widenScalar(IS, 0, ExtBigTy);
1229 }
1230
1231 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1232 unsigned SubRegIdx, RemIdx;
1233 std::tie(SubRegIdx, RemIdx) =
1235 getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
1236
1237 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
1239 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1240 bool ExactlyVecRegSized =
1241 STI.expandVScale(LitTy.getSizeInBits())
1242 .isKnownMultipleOf(STI.expandVScale(VecRegSize));
1243
1244 // If the Idx has been completely eliminated and this subvector's size is a
1245 // vector register or a multiple thereof, or the surrounding elements are
1246 // undef, then this is a subvector insert which naturally aligns to a vector
1247 // register. These can easily be handled using subregister manipulation.
1248 if (RemIdx == 0 && ExactlyVecRegSized)
1249 return true;
1250
1251 // If the subvector is smaller than a vector register, then the insertion
1252 // must preserve the undisturbed elements of the register. We do this by
1253 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1254 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1255 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1256 // LMUL=1 type back into the larger vector (resolving to another subregister
1257 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1258 // to avoid allocating a large register group to hold our subvector.
1259
1260 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1261 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1262 // (in our case undisturbed). This means we can set up a subvector insertion
1263 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1264 // size of the subvector.
1265 const LLT XLenTy(STI.getXLenVT());
1266 LLT InterLitTy = BigTy;
1267 Register AlignedExtract = BigVec;
1268 unsigned AlignedIdx = Idx - RemIdx;
1270 getLMUL1Ty(BigTy).getSizeInBits())) {
1271 InterLitTy = getLMUL1Ty(BigTy);
1272 // Extract a subvector equal to the nearest full vector register type. This
1273 // should resolve to a G_EXTRACT on a subreg.
1274 AlignedExtract =
1275 MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
1276 }
1277
1278 auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
1279 LitVec, 0);
1280
1281 auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
1282 auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
1283
1284 // If we're inserting into the lowest elements, use a tail undisturbed
1285 // vmv.v.v.
1286 MachineInstrBuilder Inserted;
1287 bool NeedInsertSubvec =
1288 TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
1289 Register InsertedDst =
1290 NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
1291 if (RemIdx == 0) {
1292 Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
1293 {AlignedExtract, Insert, VL});
1294 } else {
1295 auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
1296 // Construct the vector length corresponding to RemIdx + length(LitTy).
1297 VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
1298 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1299 ElementCount EndIndex =
1302 if (STI.expandVScale(EndIndex) ==
1303 STI.expandVScale(InterLitTy.getElementCount()))
1305
1306 Inserted =
1307 MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
1308 {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1309 }
1310
1311 // If required, insert this subvector back into the correct vector register.
1312 // This should resolve to an INSERT_SUBREG instruction.
1313 if (NeedInsertSubvec)
1314 MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);
1315
1316 MI.eraseFromParent();
1317 return true;
1318}
1319
1320static unsigned getRISCVWOpcode(unsigned Opcode) {
1321 switch (Opcode) {
1322 default:
1323 llvm_unreachable("Unexpected opcode");
1324 case TargetOpcode::G_ASHR:
1325 return RISCV::G_SRAW;
1326 case TargetOpcode::G_LSHR:
1327 return RISCV::G_SRLW;
1328 case TargetOpcode::G_SHL:
1329 return RISCV::G_SLLW;
1330 case TargetOpcode::G_SDIV:
1331 return RISCV::G_DIVW;
1332 case TargetOpcode::G_UDIV:
1333 return RISCV::G_DIVUW;
1334 case TargetOpcode::G_UREM:
1335 return RISCV::G_REMUW;
1336 case TargetOpcode::G_ROTL:
1337 return RISCV::G_ROLW;
1338 case TargetOpcode::G_ROTR:
1339 return RISCV::G_RORW;
1340 case TargetOpcode::G_CTLZ:
1341 return RISCV::G_CLZW;
1342 case TargetOpcode::G_CTTZ:
1343 return RISCV::G_CTZW;
1344 case TargetOpcode::G_FPTOSI:
1345 return RISCV::G_FCVT_W_RV64;
1346 case TargetOpcode::G_FPTOUI:
1347 return RISCV::G_FCVT_WU_RV64;
1348 }
1349}
1350
1353 LostDebugLocObserver &LocObserver) const {
1354 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1355 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1356 MachineFunction &MF = *MI.getParent()->getParent();
1357 switch (MI.getOpcode()) {
1358 default:
1359 // No idea what to do.
1360 return false;
1361 case TargetOpcode::G_ABS:
1362 return Helper.lowerAbsToMaxNeg(MI);
1363 case TargetOpcode::G_FCONSTANT: {
1364 const APFloat &FVal = MI.getOperand(1).getFPImm()->getValueAPF();
1365
1366 // Convert G_FCONSTANT to G_CONSTANT.
1367 Register DstReg = MI.getOperand(0).getReg();
1368 MIRBuilder.buildConstant(DstReg, FVal.bitcastToAPInt());
1369
1370 MI.eraseFromParent();
1371 return true;
1372 }
1373 case TargetOpcode::G_CONSTANT: {
1374 const Function &F = MF.getFunction();
1375 // TODO: if PSI and BFI are present, add " ||
1376 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1377 bool ShouldOptForSize = F.hasOptSize();
1378 const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
1379 if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
1380 return true;
1381 return Helper.lowerConstant(MI);
1382 }
1383 case TargetOpcode::G_SUB:
1384 case TargetOpcode::G_ADD: {
1385 Helper.Observer.changingInstr(MI);
1386 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1387 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1388
1389 Register DstALU = MRI.createGenericVirtualRegister(sXLen);
1390
1391 MachineOperand &MO = MI.getOperand(0);
1392 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1393 auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32);
1394
1395 MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext});
1396 MO.setReg(DstALU);
1397
1398 Helper.Observer.changedInstr(MI);
1399 return true;
1400 }
1401 case TargetOpcode::G_SEXT_INREG: {
1402 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1403 int64_t SizeInBits = MI.getOperand(2).getImm();
1404 // Source size of 32 is sext.w.
1405 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1406 return true;
1407
1408 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1409 return true;
1410
1411 return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
1413 }
1414 case TargetOpcode::G_ASHR:
1415 case TargetOpcode::G_LSHR:
1416 case TargetOpcode::G_SHL: {
1417 if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
1418 // We don't need a custom node for shift by constant. Just widen the
1419 // source and the shift amount.
1420 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1421 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1422 ExtOpc = TargetOpcode::G_SEXT;
1423 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1424 ExtOpc = TargetOpcode::G_ZEXT;
1425
1426 Helper.Observer.changingInstr(MI);
1427 Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
1428 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
1429 Helper.widenScalarDst(MI, sXLen);
1430 Helper.Observer.changedInstr(MI);
1431 return true;
1432 }
1433
1434 Helper.Observer.changingInstr(MI);
1435 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1436 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1437 Helper.widenScalarDst(MI, sXLen);
1438 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1439 Helper.Observer.changedInstr(MI);
1440 return true;
1441 }
1442 case TargetOpcode::G_SDIV:
1443 case TargetOpcode::G_UDIV:
1444 case TargetOpcode::G_UREM:
1445 case TargetOpcode::G_ROTL:
1446 case TargetOpcode::G_ROTR: {
1447 Helper.Observer.changingInstr(MI);
1448 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1449 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1450 Helper.widenScalarDst(MI, sXLen);
1451 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1452 Helper.Observer.changedInstr(MI);
1453 return true;
1454 }
1455 case TargetOpcode::G_CTLZ:
1456 case TargetOpcode::G_CTTZ: {
1457 Helper.Observer.changingInstr(MI);
1458 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1459 Helper.widenScalarDst(MI, sXLen);
1460 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1461 Helper.Observer.changedInstr(MI);
1462 return true;
1463 }
1464 case TargetOpcode::G_FPTOSI:
1465 case TargetOpcode::G_FPTOUI: {
1466 Helper.Observer.changingInstr(MI);
1467 Helper.widenScalarDst(MI, sXLen);
1468 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1470 Helper.Observer.changedInstr(MI);
1471 return true;
1472 }
1473 case TargetOpcode::G_IS_FPCLASS: {
1474 Register GISFPCLASS = MI.getOperand(0).getReg();
1475 Register Src = MI.getOperand(1).getReg();
1476 const MachineOperand &ImmOp = MI.getOperand(2);
1477 MachineIRBuilder MIB(MI);
1478
1479 // Turn LLVM IR's floating point classes to that in RISC-V,
1480 // by simply rotating the 10-bit immediate right by two bits.
1481 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1482 auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
1483 auto ConstZero = MIB.buildConstant(sXLen, 0);
1484
1485 auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
1486 auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
1487 MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
1488
1489 MI.eraseFromParent();
1490 return true;
1491 }
1492 case TargetOpcode::G_BRJT:
1493 return legalizeBRJT(MI, MIRBuilder);
1494 case TargetOpcode::G_VASTART:
1495 return legalizeVAStart(MI, MIRBuilder);
1496 case TargetOpcode::G_VSCALE:
1497 return legalizeVScale(MI, MIRBuilder);
1498 case TargetOpcode::G_ZEXT:
1499 case TargetOpcode::G_SEXT:
1500 case TargetOpcode::G_ANYEXT:
1501 return legalizeExt(MI, MIRBuilder);
1502 case TargetOpcode::G_SPLAT_VECTOR:
1503 return legalizeSplatVector(MI, MIRBuilder);
1504 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1505 return legalizeExtractSubvector(MI, MIRBuilder);
1506 case TargetOpcode::G_INSERT_SUBVECTOR:
1507 return legalizeInsertSubvector(MI, Helper, MIRBuilder);
1508 case TargetOpcode::G_LOAD:
1509 case TargetOpcode::G_STORE:
1510 return legalizeLoadStore(MI, Helper, MIRBuilder);
1511 }
1512
1513 llvm_unreachable("expected switch to return");
1514}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define P(N)
ppc ctr loops verify
static LLT getLMUL1Ty(LLT VecTy)
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static std::pair< MachineInstrBuilder, MachineInstrBuilder > buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Gets the two common "VL" operands: an all-ones mask and the vector length.
static LegalityPredicate typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list< LLT > BoolVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, const SrcOp &Scalar, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LegalityPredicate typeIsLegalIntOrFPVec(unsigned TypeIdx, std::initializer_list< LLT > IntOrFPVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, Register Hi, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, std::initializer_list< LLT > PtrVecTys, const RISCVSubtarget &ST)
static unsigned getRISCVWOpcode(unsigned Opcode)
This file declares the targeting of the Machinelegalizer class for RISC-V.
Value * LHS
APInt bitcastToAPInt() const
Definition APFloat.h:1353
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
LLVM_ABI APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition APInt.cpp:1154
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
@ ICMP_NE
not equal
Definition InstrTypes.h:700
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildBrIndirect(Register Tgt)
Build and insert G_BRINDIRECT Tgt.
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
LLVM_ABI unsigned getEntrySize(const DataLayout &TD) const
getEntrySize - Return the size of each entry in the jump table.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
LLVM_ABI unsigned getEntryAlignment(const DataLayout &TD) const
getEntryAlignment - Return the alignment of each entry in the jump table.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
RISCVLegalizerInfo(const RISCVSubtarget &ST)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Register getReg() const
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static constexpr unsigned RVVBitsPerBlock
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2033
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1607
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< LLT > Types
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.