LLVM 22.0.0git
RISCVLegalizerInfo.cpp
Go to the documentation of this file.
1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
16#include "RISCVSubtarget.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsRISCV.h"
31#include "llvm/IR/Type.h"
32
33using namespace llvm;
34using namespace LegalityPredicates;
35using namespace LegalizeMutations;
36
38typeIsLegalIntOrFPVec(unsigned TypeIdx,
39 std::initializer_list<LLT> IntOrFPVecTys,
40 const RISCVSubtarget &ST) {
41 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
42 return ST.hasVInstructions() &&
43 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
44 ST.hasVInstructionsI64()) &&
45 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
46 ST.getELen() == 64);
47 };
48
49 return all(typeInSet(TypeIdx, IntOrFPVecTys), P);
50}
51
53typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
54 const RISCVSubtarget &ST) {
55 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
56 return ST.hasVInstructions() &&
57 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
58 ST.getELen() == 64);
59 };
60 return all(typeInSet(TypeIdx, BoolVecTys), P);
61}
62
63static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
64 std::initializer_list<LLT> PtrVecTys,
65 const RISCVSubtarget &ST) {
66 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
67 return ST.hasVInstructions() &&
68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
69 ST.getELen() == 64) &&
70 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
71 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
72 };
73 return all(typeInSet(TypeIdx, PtrVecTys), P);
74}
75
77 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {
78 const LLT sDoubleXLen = LLT::scalar(2 * XLen);
79 const LLT p0 = LLT::pointer(0, XLen);
80 const LLT s1 = LLT::scalar(1);
81 const LLT s8 = LLT::scalar(8);
82 const LLT s16 = LLT::scalar(16);
83 const LLT s32 = LLT::scalar(32);
84 const LLT s64 = LLT::scalar(64);
85 const LLT s128 = LLT::scalar(128);
86
87 const LLT nxv1s1 = LLT::scalable_vector(1, s1);
88 const LLT nxv2s1 = LLT::scalable_vector(2, s1);
89 const LLT nxv4s1 = LLT::scalable_vector(4, s1);
90 const LLT nxv8s1 = LLT::scalable_vector(8, s1);
91 const LLT nxv16s1 = LLT::scalable_vector(16, s1);
92 const LLT nxv32s1 = LLT::scalable_vector(32, s1);
93 const LLT nxv64s1 = LLT::scalable_vector(64, s1);
94
95 const LLT nxv1s8 = LLT::scalable_vector(1, s8);
96 const LLT nxv2s8 = LLT::scalable_vector(2, s8);
97 const LLT nxv4s8 = LLT::scalable_vector(4, s8);
98 const LLT nxv8s8 = LLT::scalable_vector(8, s8);
99 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
100 const LLT nxv32s8 = LLT::scalable_vector(32, s8);
101 const LLT nxv64s8 = LLT::scalable_vector(64, s8);
102
103 const LLT nxv1s16 = LLT::scalable_vector(1, s16);
104 const LLT nxv2s16 = LLT::scalable_vector(2, s16);
105 const LLT nxv4s16 = LLT::scalable_vector(4, s16);
106 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
107 const LLT nxv16s16 = LLT::scalable_vector(16, s16);
108 const LLT nxv32s16 = LLT::scalable_vector(32, s16);
109
110 const LLT nxv1s32 = LLT::scalable_vector(1, s32);
111 const LLT nxv2s32 = LLT::scalable_vector(2, s32);
112 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
113 const LLT nxv8s32 = LLT::scalable_vector(8, s32);
114 const LLT nxv16s32 = LLT::scalable_vector(16, s32);
115
116 const LLT nxv1s64 = LLT::scalable_vector(1, s64);
117 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
118 const LLT nxv4s64 = LLT::scalable_vector(4, s64);
119 const LLT nxv8s64 = LLT::scalable_vector(8, s64);
120
121 const LLT nxv1p0 = LLT::scalable_vector(1, p0);
122 const LLT nxv2p0 = LLT::scalable_vector(2, p0);
123 const LLT nxv4p0 = LLT::scalable_vector(4, p0);
124 const LLT nxv8p0 = LLT::scalable_vector(8, p0);
125 const LLT nxv16p0 = LLT::scalable_vector(16, p0);
126
127 using namespace TargetOpcode;
128
129 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
130
131 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
132 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
133 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
134 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
135
136 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
137
138 getActionDefinitionsBuilder({G_ADD, G_SUB})
139 .legalFor({sXLen})
140 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
141 .customFor(ST.is64Bit(), {s32})
143 .clampScalar(0, sXLen, sXLen);
144
145 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
146 .legalFor({sXLen})
147 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
149 .clampScalar(0, sXLen, sXLen);
150
152 {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
153
154 getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
155
156 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
158 {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT})
159 .lower();
160
161 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
162 .legalFor({{sXLen, sXLen}})
163 .customFor(ST.is64Bit(), {{s32, s32}})
164 .widenScalarToNextPow2(0)
165 .clampScalar(1, sXLen, sXLen)
166 .clampScalar(0, sXLen, sXLen);
167
168 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
169 .legalFor({{s32, s16}})
170 .legalFor(ST.is64Bit(), {{s64, s16}, {s64, s32}})
171 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
172 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
173 .customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))
174 .maxScalar(0, sXLen);
175
176 getActionDefinitionsBuilder(G_SEXT_INREG)
177 .customFor({sXLen})
178 .clampScalar(0, sXLen, sXLen)
179 .lower();
180
181 // Merge/Unmerge
182 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
183 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);
184 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
185 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
186 if (XLen == 32 && ST.hasStdExtD()) {
187 MergeUnmergeActions.legalIf(
188 all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));
189 }
190 MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)
191 .widenScalarToNextPow2(BigTyIdx, XLen)
192 .clampScalar(LitTyIdx, sXLen, sXLen)
193 .clampScalar(BigTyIdx, sXLen, sXLen);
194 }
195
196 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
197
198 getActionDefinitionsBuilder({G_ROTR, G_ROTL})
199 .legalFor(ST.hasStdExtZbb() || ST.hasStdExtZbkb(), {{sXLen, sXLen}})
200 .customFor(ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
201 {{s32, s32}})
202 .lower();
203
204 getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();
205
206 getActionDefinitionsBuilder(G_BITCAST).legalIf(
208 typeIsLegalBoolVec(0, BoolVecTys, ST)),
210 typeIsLegalBoolVec(1, BoolVecTys, ST))));
211
212 auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);
213 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
214 BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);
215 else
216 BSWAPActions.maxScalar(0, sXLen).lower();
217
218 auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});
219 auto &CountZerosUndefActions =
220 getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
221 if (ST.hasStdExtZbb()) {
222 CountZerosActions.legalFor({{sXLen, sXLen}})
223 .customFor({{s32, s32}})
224 .clampScalar(0, s32, sXLen)
225 .widenScalarToNextPow2(0)
226 .scalarSameSizeAs(1, 0);
227 } else {
228 CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
229 CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);
230 }
231 CountZerosUndefActions.lower();
232
233 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
234 if (ST.hasStdExtZbb()) {
235 CTPOPActions.legalFor({{sXLen, sXLen}})
236 .clampScalar(0, sXLen, sXLen)
237 .scalarSameSizeAs(1, 0);
238 } else {
239 CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();
240 }
241
242 getActionDefinitionsBuilder(G_CONSTANT)
243 .legalFor({p0})
244 .legalFor(!ST.is64Bit(), {s32})
245 .customFor(ST.is64Bit(), {s64})
246 .widenScalarToNextPow2(0)
247 .clampScalar(0, sXLen, sXLen);
248
249 // TODO: transform illegal vector types into legal vector type
250 getActionDefinitionsBuilder(G_FREEZE)
251 .legalFor({s16, s32, p0})
252 .legalFor(ST.is64Bit(), {s64})
253 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
254 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
255 .widenScalarToNextPow2(0)
256 .clampScalar(0, s16, sXLen);
257
258 // TODO: transform illegal vector types into legal vector type
259 // TODO: Merge with G_FREEZE?
260 getActionDefinitionsBuilder(
261 {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
262 .legalFor({s32, sXLen, p0})
263 .legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
264 .legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))
265 .widenScalarToNextPow2(0)
266 .clampScalar(0, s32, sXLen);
267
268 getActionDefinitionsBuilder(G_ICMP)
269 .legalFor({{sXLen, sXLen}, {sXLen, p0}})
270 .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
271 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))
272 .widenScalarOrEltToNextPow2OrMinSize(1, 8)
273 .clampScalar(1, sXLen, sXLen)
274 .clampScalar(0, sXLen, sXLen);
275
276 getActionDefinitionsBuilder(G_SELECT)
277 .legalFor({{s32, sXLen}, {p0, sXLen}})
278 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
279 typeIsLegalBoolVec(1, BoolVecTys, ST)))
280 .legalFor(XLen == 64 || ST.hasStdExtD(), {{s64, sXLen}})
281 .widenScalarToNextPow2(0)
282 .clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
283 .clampScalar(1, sXLen, sXLen);
284
285 auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
286 auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
287 auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});
288
289 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
290 // is supported, we only require byte alignment. Otherwise, we need the memory
291 // op to be natively aligned.
292 auto getScalarMemAlign = [&ST](unsigned Size) {
293 return ST.enableUnalignedScalarMem() ? 8 : Size;
294 };
295
296 LoadActions.legalForTypesWithMemDesc(
297 {{s16, p0, s8, getScalarMemAlign(8)},
298 {s32, p0, s8, getScalarMemAlign(8)},
299 {s16, p0, s16, getScalarMemAlign(16)},
300 {s32, p0, s16, getScalarMemAlign(16)},
301 {s32, p0, s32, getScalarMemAlign(32)},
302 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
303 StoreActions.legalForTypesWithMemDesc(
304 {{s16, p0, s8, getScalarMemAlign(8)},
305 {s32, p0, s8, getScalarMemAlign(8)},
306 {s16, p0, s16, getScalarMemAlign(16)},
307 {s32, p0, s16, getScalarMemAlign(16)},
308 {s32, p0, s32, getScalarMemAlign(32)},
309 {p0, p0, sXLen, getScalarMemAlign(XLen)}});
310 ExtLoadActions.legalForTypesWithMemDesc(
311 {{sXLen, p0, s8, getScalarMemAlign(8)},
312 {sXLen, p0, s16, getScalarMemAlign(16)}});
313 if (XLen == 64) {
314 LoadActions.legalForTypesWithMemDesc(
315 {{s64, p0, s8, getScalarMemAlign(8)},
316 {s64, p0, s16, getScalarMemAlign(16)},
317 {s64, p0, s32, getScalarMemAlign(32)},
318 {s64, p0, s64, getScalarMemAlign(64)}});
319 StoreActions.legalForTypesWithMemDesc(
320 {{s64, p0, s8, getScalarMemAlign(8)},
321 {s64, p0, s16, getScalarMemAlign(16)},
322 {s64, p0, s32, getScalarMemAlign(32)},
323 {s64, p0, s64, getScalarMemAlign(64)}});
324 ExtLoadActions.legalForTypesWithMemDesc(
325 {{s64, p0, s32, getScalarMemAlign(32)}});
326 } else if (ST.hasStdExtD()) {
327 LoadActions.legalForTypesWithMemDesc(
328 {{s64, p0, s64, getScalarMemAlign(64)}});
329 StoreActions.legalForTypesWithMemDesc(
330 {{s64, p0, s64, getScalarMemAlign(64)}});
331 }
332
333 // Vector loads/stores.
334 if (ST.hasVInstructions()) {
335 LoadActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
336 {nxv4s8, p0, nxv4s8, 8},
337 {nxv8s8, p0, nxv8s8, 8},
338 {nxv16s8, p0, nxv16s8, 8},
339 {nxv32s8, p0, nxv32s8, 8},
340 {nxv64s8, p0, nxv64s8, 8},
341 {nxv2s16, p0, nxv2s16, 16},
342 {nxv4s16, p0, nxv4s16, 16},
343 {nxv8s16, p0, nxv8s16, 16},
344 {nxv16s16, p0, nxv16s16, 16},
345 {nxv32s16, p0, nxv32s16, 16},
346 {nxv2s32, p0, nxv2s32, 32},
347 {nxv4s32, p0, nxv4s32, 32},
348 {nxv8s32, p0, nxv8s32, 32},
349 {nxv16s32, p0, nxv16s32, 32}});
350 StoreActions.legalForTypesWithMemDesc({{nxv2s8, p0, nxv2s8, 8},
351 {nxv4s8, p0, nxv4s8, 8},
352 {nxv8s8, p0, nxv8s8, 8},
353 {nxv16s8, p0, nxv16s8, 8},
354 {nxv32s8, p0, nxv32s8, 8},
355 {nxv64s8, p0, nxv64s8, 8},
356 {nxv2s16, p0, nxv2s16, 16},
357 {nxv4s16, p0, nxv4s16, 16},
358 {nxv8s16, p0, nxv8s16, 16},
359 {nxv16s16, p0, nxv16s16, 16},
360 {nxv32s16, p0, nxv32s16, 16},
361 {nxv2s32, p0, nxv2s32, 32},
362 {nxv4s32, p0, nxv4s32, 32},
363 {nxv8s32, p0, nxv8s32, 32},
364 {nxv16s32, p0, nxv16s32, 32}});
365
366 if (ST.getELen() == 64) {
367 LoadActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
368 {nxv1s16, p0, nxv1s16, 16},
369 {nxv1s32, p0, nxv1s32, 32}});
370 StoreActions.legalForTypesWithMemDesc({{nxv1s8, p0, nxv1s8, 8},
371 {nxv1s16, p0, nxv1s16, 16},
372 {nxv1s32, p0, nxv1s32, 32}});
373 }
374
375 if (ST.hasVInstructionsI64()) {
376 LoadActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
377 {nxv2s64, p0, nxv2s64, 64},
378 {nxv4s64, p0, nxv4s64, 64},
379 {nxv8s64, p0, nxv8s64, 64}});
380 StoreActions.legalForTypesWithMemDesc({{nxv1s64, p0, nxv1s64, 64},
381 {nxv2s64, p0, nxv2s64, 64},
382 {nxv4s64, p0, nxv4s64, 64},
383 {nxv8s64, p0, nxv8s64, 64}});
384 }
385
386 // we will take the custom lowering logic if we have scalable vector types
387 // with non-standard alignments
388 LoadActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
389 StoreActions.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
390
391 // Pointers require that XLen sized elements are legal.
392 if (XLen <= ST.getELen()) {
393 LoadActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
394 StoreActions.customIf(typeIsLegalPtrVec(0, PtrVecTys, ST));
395 }
396 }
397
398 LoadActions.widenScalarToNextPow2(0, /* MinSize = */ 8)
399 .lowerIfMemSizeNotByteSizePow2()
400 .clampScalar(0, s16, sXLen)
401 .lower();
402 StoreActions
403 .clampScalar(0, s16, sXLen)
404 .lowerIfMemSizeNotByteSizePow2()
405 .lower();
406
407 ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, sXLen, sXLen).lower();
408
409 getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});
410
411 getActionDefinitionsBuilder(G_PTRTOINT)
412 .legalFor({{sXLen, p0}})
413 .clampScalar(0, sXLen, sXLen);
414
415 getActionDefinitionsBuilder(G_INTTOPTR)
416 .legalFor({{p0, sXLen}})
417 .clampScalar(1, sXLen, sXLen);
418
419 getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);
420
421 getActionDefinitionsBuilder(G_BRJT).customFor({{p0, sXLen}});
422
423 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
424
425 getActionDefinitionsBuilder(G_PHI)
426 .legalFor({p0, s32, sXLen})
427 .widenScalarToNextPow2(0)
428 .clampScalar(0, s32, sXLen);
429
430 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
431 .legalFor({p0});
432
433 if (ST.hasStdExtZmmul()) {
434 getActionDefinitionsBuilder(G_MUL)
435 .legalFor({sXLen})
436 .widenScalarToNextPow2(0)
437 .clampScalar(0, sXLen, sXLen);
438
439 // clang-format off
440 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
441 .legalFor({sXLen})
442 .lower();
443 // clang-format on
444
445 getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();
446 } else {
447 getActionDefinitionsBuilder(G_MUL)
448 .libcallFor({sXLen, sDoubleXLen})
449 .widenScalarToNextPow2(0)
450 .clampScalar(0, sXLen, sDoubleXLen);
451
452 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});
453
454 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
455 .minScalar(0, sXLen)
456 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
457 // the low bits for the mul result and high bits to do the overflow
458 // check.
459 .widenScalarIf(typeIs(0, sXLen),
460 LegalizeMutations::changeTo(0, sDoubleXLen))
461 .lower();
462 }
463
464 if (ST.hasStdExtM()) {
465 getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_UREM})
466 .legalFor({sXLen})
467 .customFor({s32})
468 .libcallFor({sDoubleXLen})
469 .clampScalar(0, s32, sDoubleXLen)
470 .widenScalarToNextPow2(0);
471 getActionDefinitionsBuilder(G_SREM)
472 .legalFor({sXLen})
473 .libcallFor({sDoubleXLen})
474 .clampScalar(0, sXLen, sDoubleXLen)
475 .widenScalarToNextPow2(0);
476 } else {
477 getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})
478 .libcallFor({sXLen, sDoubleXLen})
479 .clampScalar(0, sXLen, sDoubleXLen)
480 .widenScalarToNextPow2(0);
481 }
482
483 // TODO: Use libcall for sDoubleXLen.
484 getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}).lower();
485
486 getActionDefinitionsBuilder(G_ABS)
487 .customFor(ST.hasStdExtZbb(), {sXLen})
488 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
489 .lower();
490
491 getActionDefinitionsBuilder({G_ABDS, G_ABDU})
492 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
493 .lower();
494
495 getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
496 .legalFor(ST.hasStdExtZbb(), {sXLen})
497 .minScalar(ST.hasStdExtZbb(), 0, sXLen)
498 .lower();
499
500 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
501
502 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
503
504 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
505
506 getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
507 .lower();
508
509 // FP Operations
510
511 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
512 getActionDefinitionsBuilder(
513 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
514 .legalFor(ST.hasStdExtF(), {s32})
515 .legalFor(ST.hasStdExtD(), {s64})
516 .legalFor(ST.hasStdExtZfh(), {s16})
517 .libcallFor({s32, s64})
518 .libcallFor(ST.is64Bit(), {s128});
519
520 getActionDefinitionsBuilder({G_FNEG, G_FABS})
521 .legalFor(ST.hasStdExtF(), {s32})
522 .legalFor(ST.hasStdExtD(), {s64})
523 .legalFor(ST.hasStdExtZfh(), {s16})
524 .lowerFor({s32, s64, s128});
525
526 getActionDefinitionsBuilder(G_FREM)
527 .libcallFor({s32, s64})
528 .libcallFor(ST.is64Bit(), {s128})
529 .minScalar(0, s32)
530 .scalarize(0);
531
532 getActionDefinitionsBuilder(G_FCOPYSIGN)
533 .legalFor(ST.hasStdExtF(), {{s32, s32}})
534 .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}})
535 .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}})
536 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}})
537 .lower();
538
539 // FIXME: Use Zfhmin.
540 getActionDefinitionsBuilder(G_FPTRUNC)
541 .legalFor(ST.hasStdExtD(), {{s32, s64}})
542 .legalFor(ST.hasStdExtZfh(), {{s16, s32}})
543 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}})
544 .libcallFor({{s32, s64}})
545 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}});
546 getActionDefinitionsBuilder(G_FPEXT)
547 .legalFor(ST.hasStdExtD(), {{s64, s32}})
548 .legalFor(ST.hasStdExtZfh(), {{s32, s16}})
549 .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s64, s16}})
550 .libcallFor({{s64, s32}})
551 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}});
552
553 getActionDefinitionsBuilder(G_FCMP)
554 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
555 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
556 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
557 .clampScalar(0, sXLen, sXLen)
558 .libcallFor({{sXLen, s32}, {sXLen, s64}})
559 .libcallFor(ST.is64Bit(), {{sXLen, s128}});
560
561 // TODO: Support vector version of G_IS_FPCLASS.
562 getActionDefinitionsBuilder(G_IS_FPCLASS)
563 .customFor(ST.hasStdExtF(), {{s1, s32}})
564 .customFor(ST.hasStdExtD(), {{s1, s64}})
565 .customFor(ST.hasStdExtZfh(), {{s1, s16}})
566 .lowerFor({{s1, s32}, {s1, s64}});
567
568 getActionDefinitionsBuilder(G_FCONSTANT)
569 .legalFor(ST.hasStdExtF(), {s32})
570 .legalFor(ST.hasStdExtD(), {s64})
571 .legalFor(ST.hasStdExtZfh(), {s16})
572 .lowerFor({s32, s64, s128});
573
574 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
575 .legalFor(ST.hasStdExtF(), {{sXLen, s32}})
576 .legalFor(ST.hasStdExtD(), {{sXLen, s64}})
577 .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
578 .customFor(ST.is64Bit() && ST.hasStdExtF(), {{s32, s32}})
579 .customFor(ST.is64Bit() && ST.hasStdExtD(), {{s32, s64}})
580 .customFor(ST.is64Bit() && ST.hasStdExtZfh(), {{s32, s16}})
581 .widenScalarToNextPow2(0)
582 .minScalar(0, s32)
583 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
584 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}}) // FIXME RV32.
585 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}, {s128, s128}});
586
587 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
588 .legalFor(ST.hasStdExtF(), {{s32, sXLen}})
589 .legalFor(ST.hasStdExtD(), {{s64, sXLen}})
590 .legalFor(ST.hasStdExtZfh(), {{s16, sXLen}})
591 .widenScalarToNextPow2(1)
592 // Promote to XLen if the operation is legal.
593 .widenScalarIf(
594 [=, &ST](const LegalityQuery &Query) {
595 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
596 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
597 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
598 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
599 (ST.hasStdExtZfh() &&
600 Query.Types[0].getSizeInBits() == 16));
601 },
603 // Otherwise only promote to s32 since we have si libcalls.
604 .minScalar(1, s32)
605 .libcallFor({{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
606 .libcallFor(ST.is64Bit(), {{s128, s32}, {s128, s64}}) // FIXME RV32.
607 .libcallFor(ST.is64Bit(), {{s32, s128}, {s64, s128}, {s128, s128}});
608
609 // FIXME: We can do custom inline expansion like SelectionDAG.
610 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
611 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
612 G_INTRINSIC_ROUNDEVEN})
613 .legalFor(ST.hasStdExtZfa(), {s32})
614 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
615 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16})
616 .libcallFor({s32, s64})
617 .libcallFor(ST.is64Bit(), {s128});
618
619 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
620 .legalFor(ST.hasStdExtZfa(), {s32})
621 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtD(), {s64})
622 .legalFor(ST.hasStdExtZfa() && ST.hasStdExtZfh(), {s16});
623
624 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
625 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
626 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
627 G_FTANH})
628 .libcallFor({s32, s64})
629 .libcallFor(ST.is64Bit(), {s128});
630 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
631 .libcallFor({{s32, s32}, {s64, s32}})
632 .libcallFor(ST.is64Bit(), {s128, s32});
633
634 getActionDefinitionsBuilder(G_VASTART).customFor({p0});
635
636 // va_list must be a pointer, but most sized types are pretty easy to handle
637 // as the destination.
638 getActionDefinitionsBuilder(G_VAARG)
639 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
640 // other than sXLen.
641 .clampScalar(0, sXLen, sXLen)
642 .lowerForCartesianProduct({sXLen, p0}, {p0});
643
644 getActionDefinitionsBuilder(G_VSCALE)
645 .clampScalar(0, sXLen, sXLen)
646 .customFor({sXLen});
647
648 auto &SplatActions =
649 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
650 .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
651 typeIs(1, sXLen)))
652 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));
653 // Handle case of s64 element vectors on RV32. If the subtarget does not have
654 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
655 // does have f64, then we don't know whether the type is an f64 or an i64,
656 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
657 // depending on how the instructions it consumes are legalized. They are not
658 // legalized yet since legalization is in reverse postorder, so we cannot
659 // make the decision at this moment.
660 if (XLen == 32) {
661 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
662 SplatActions.legalIf(all(
663 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
664 else if (ST.hasVInstructionsI64())
665 SplatActions.customIf(all(
666 typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));
667 }
668
669 SplatActions.clampScalar(1, sXLen, sXLen);
670
671 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
672 LLT DstTy = Query.Types[0];
673 LLT SrcTy = Query.Types[1];
674 return DstTy.getElementType() == LLT::scalar(1) &&
675 DstTy.getElementCount().getKnownMinValue() >= 8 &&
676 SrcTy.getElementCount().getKnownMinValue() >= 8;
677 };
678 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
679 // We don't have the ability to slide mask vectors down indexed by their
680 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
681 // to equivalent i8 vectors.
682 .bitcastIf(
683 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
684 typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
685 [=](const LegalityQuery &Query) {
686 LLT CastTy = LLT::vector(
687 Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
688 return std::pair(0, CastTy);
689 })
690 .customIf(LegalityPredicates::any(
691 all(typeIsLegalBoolVec(0, BoolVecTys, ST),
692 typeIsLegalBoolVec(1, BoolVecTys, ST)),
693 all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
694 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))));
695
696 getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
697 .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
698 typeIsLegalBoolVec(1, BoolVecTys, ST)))
699 .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
700 typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
701
702 getActionDefinitionsBuilder(G_ATOMICRMW_ADD)
703 .legalFor(ST.hasStdExtA(), {{sXLen, p0}})
704 .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
705 .clampScalar(0, sXLen, sXLen);
706
707 getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
708 .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
709 .clampScalar(0, sXLen, sXLen)
710 .lower();
711
712 getLegacyLegalizerInfo().computeTables();
713 verify(*ST.getInstrInfo());
714}
715
717 MachineInstr &MI) const {
718 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
719 switch (IntrinsicID) {
720 default:
721 return false;
722 case Intrinsic::vacopy: {
723 // vacopy arguments must be legal because of the intrinsic signature.
724 // No need to check here.
725
726 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
727 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
728 MachineFunction &MF = *MI.getMF();
729 const DataLayout &DL = MIRBuilder.getDataLayout();
730 LLVMContext &Ctx = MF.getFunction().getContext();
731
732 Register DstLst = MI.getOperand(1).getReg();
733 LLT PtrTy = MRI.getType(DstLst);
734
735 // Load the source va_list
736 Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
738 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);
739 auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);
740
741 // Store the result in the destination va_list
744 MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);
745
746 MI.eraseFromParent();
747 return true;
748 }
749 case Intrinsic::riscv_masked_atomicrmw_add:
750 case Intrinsic::riscv_masked_atomicrmw_sub:
751 return true;
752 }
753}
754
755bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
756 MachineIRBuilder &MIRBuilder) const {
757 // Stores the address of the VarArgsFrameIndex slot into the memory location
758 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
759 MachineFunction *MF = MI.getParent()->getParent();
761 int FI = FuncInfo->getVarArgsFrameIndex();
762 LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());
763 auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);
764 assert(MI.hasOneMemOperand());
765 MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),
766 *MI.memoperands()[0]);
767 MI.eraseFromParent();
768 return true;
769}
770
771bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
772 MachineIRBuilder &MIRBuilder) const {
773 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
774 auto &MF = *MI.getParent()->getParent();
775 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
776 unsigned EntrySize = MJTI->getEntrySize(MF.getDataLayout());
777
778 Register PtrReg = MI.getOperand(0).getReg();
779 LLT PtrTy = MRI.getType(PtrReg);
780 Register IndexReg = MI.getOperand(2).getReg();
781 LLT IndexTy = MRI.getType(IndexReg);
782
783 if (!isPowerOf2_32(EntrySize))
784 return false;
785
786 auto ShiftAmt = MIRBuilder.buildConstant(IndexTy, Log2_32(EntrySize));
787 IndexReg = MIRBuilder.buildShl(IndexTy, IndexReg, ShiftAmt).getReg(0);
788
789 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, IndexReg);
790
793 EntrySize, Align(MJTI->getEntryAlignment(MF.getDataLayout())));
794
795 Register TargetReg;
796 switch (MJTI->getEntryKind()) {
797 default:
798 return false;
800 // For PIC, the sequence is:
801 // BRIND(load(Jumptable + index) + RelocBase)
802 // RelocBase can be JumpTable, GOT or some sort of global base.
803 unsigned LoadOpc =
804 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
805 auto Load = MIRBuilder.buildLoadInstr(LoadOpc, IndexTy, Addr, *MMO);
806 TargetReg = MIRBuilder.buildPtrAdd(PtrTy, PtrReg, Load).getReg(0);
807 break;
808 }
810 auto Load = MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, IndexTy,
811 Addr, *MMO);
812 TargetReg = MIRBuilder.buildIntToPtr(PtrTy, Load).getReg(0);
813 break;
814 }
816 TargetReg = MIRBuilder.buildLoad(PtrTy, Addr, *MMO).getReg(0);
817 break;
818 }
819
820 MIRBuilder.buildBrIndirect(TargetReg);
821
822 MI.eraseFromParent();
823 return true;
824}
825
826bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
827 bool ShouldOptForSize) const {
828 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
829 int64_t Imm = APImm.getSExtValue();
830 // All simm32 constants should be handled by isel.
831 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
832 // this check redundant, but small immediates are common so this check
833 // should have better compile time.
834 if (isInt<32>(Imm))
835 return false;
836
837 // We only need to cost the immediate, if constant pool lowering is enabled.
838 if (!STI.useConstantPoolForLargeInts())
839 return false;
840
842 if (Seq.size() <= STI.getMaxBuildIntsCost())
843 return false;
844
845 // Optimizations below are disabled for opt size. If we're optimizing for
846 // size, use a constant pool.
847 if (ShouldOptForSize)
848 return true;
849 //
850 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
851 // that if it will avoid a constant pool.
852 // It will require an extra temporary register though.
853 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
854 // low and high 32 bits are the same and bit 31 and 63 are set.
855 unsigned ShiftAmt, AddOpc;
857 RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);
858 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
859}
860
861bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
862 MachineIRBuilder &MIB) const {
863 const LLT XLenTy(STI.getXLenVT());
864 Register Dst = MI.getOperand(0).getReg();
865
866 // We define our scalable vector types for lmul=1 to use a 64 bit known
867 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
868 // vscale as VLENB / 8.
869 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
870 if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
871 // Support for VLEN==32 is incomplete.
872 return false;
873
874 // We assume VLENB is a multiple of 8. We manually choose the best shift
875 // here because SimplifyDemandedBits isn't always able to simplify it.
876 uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
877 if (isPowerOf2_64(Val)) {
878 uint64_t Log2 = Log2_64(Val);
879 if (Log2 < 3) {
880 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
881 MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
882 } else if (Log2 > 3) {
883 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
884 MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
885 } else {
886 MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
887 }
888 } else if ((Val % 8) == 0) {
889 // If the multiplier is a multiple of 8, scale it down to avoid needing
890 // to shift the VLENB value.
891 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
892 MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
893 } else {
894 auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
895 auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
896 MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
897 }
898 MI.eraseFromParent();
899 return true;
900}
901
902// Custom-lower extensions from mask vectors by using a vselect either with 1
903// for zero/any-extension or -1 for sign-extension:
904// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
905// Note that any-extension is lowered identically to zero-extension.
906bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
907 MachineIRBuilder &MIB) const {
908
909 unsigned Opc = MI.getOpcode();
910 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
911 Opc == TargetOpcode::G_ANYEXT);
912
913 MachineRegisterInfo &MRI = *MIB.getMRI();
914 Register Dst = MI.getOperand(0).getReg();
915 Register Src = MI.getOperand(1).getReg();
916
917 LLT DstTy = MRI.getType(Dst);
918 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
919 LLT DstEltTy = DstTy.getElementType();
920 auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));
921 auto SplatTrue =
922 MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));
923 MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);
924
925 MI.eraseFromParent();
926 return true;
927}
928
929bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
930 LegalizerHelper &Helper,
931 MachineIRBuilder &MIB) const {
933 "Machine instructions must be Load/Store.");
934 MachineRegisterInfo &MRI = *MIB.getMRI();
935 MachineFunction *MF = MI.getMF();
936 const DataLayout &DL = MIB.getDataLayout();
937 LLVMContext &Ctx = MF->getFunction().getContext();
938
939 Register DstReg = MI.getOperand(0).getReg();
940 LLT DataTy = MRI.getType(DstReg);
941 if (!DataTy.isVector())
942 return false;
943
944 if (!MI.hasOneMemOperand())
945 return false;
946
947 MachineMemOperand *MMO = *MI.memoperands_begin();
948
949 const auto *TLI = STI.getTargetLowering();
950 EVT VT = EVT::getEVT(getTypeForLLT(DataTy, Ctx));
951
952 if (TLI->allowsMemoryAccessForAlignment(Ctx, DL, VT, *MMO))
953 return true;
954
955 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
956 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
957 "Unexpected unaligned RVV load type");
958
959 // Calculate the new vector type with i8 elements
960 unsigned NumElements =
961 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
962 LLT NewDataTy = LLT::scalable_vector(NumElements, 8);
963
964 Helper.bitcast(MI, 0, NewDataTy);
965
966 return true;
967}
968
969/// Return the type of the mask type suitable for masking the provided
970/// vector type. This is simply an i1 element type vector of the same
971/// (possibly scalable) length.
972static LLT getMaskTypeFor(LLT VecTy) {
973 assert(VecTy.isVector());
974 ElementCount EC = VecTy.getElementCount();
975 return LLT::vector(EC, LLT::scalar(1));
976}
977
978/// Creates an all ones mask suitable for masking a vector of type VecTy with
979/// vector length VL.
981 MachineIRBuilder &MIB,
983 LLT MaskTy = getMaskTypeFor(VecTy);
984 return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
985}
986
987/// Gets the two common "VL" operands: an all-ones mask and the vector length.
988/// VecTy is a scalable vector type.
989static std::pair<MachineInstrBuilder, MachineInstrBuilder>
991 assert(VecTy.isScalableVector() && "Expecting scalable container type");
992 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
993 LLT XLenTy(STI.getXLenVT());
994 auto VL = MIB.buildConstant(XLenTy, -1);
995 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
996 return {Mask, VL};
997}
998
1000buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
1001 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
1003 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
1004 // splat Lo even if it might be sign extended. I don't think we have
1005 // introduced a case where we're build a s64 where the upper bits are undef
1006 // yet.
1007
1008 // Fall back to a stack store and stride x0 vector load.
1009 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
1010 // preprocessDAG in SDAG.
1011 return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},
1012 {Passthru, Lo, Hi, VL});
1013}
1014
1016buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
1017 const SrcOp &Scalar, const SrcOp &VL,
1019 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1020 auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);
1021 return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),
1022 Unmerge.getReg(1), VL, MIB, MRI);
1023}
1024
1025// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1026// legal equivalently-sized i8 type, so we can use that as a go-between.
1027// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1028// VMCLR_VL.
1029bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1030 MachineIRBuilder &MIB) const {
1031 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1032
1033 MachineRegisterInfo &MRI = *MIB.getMRI();
1034
1035 Register Dst = MI.getOperand(0).getReg();
1036 Register SplatVal = MI.getOperand(1).getReg();
1037
1038 LLT VecTy = MRI.getType(Dst);
1039 LLT XLenTy(STI.getXLenVT());
1040
1041 // Handle case of s64 element vectors on rv32
1042 if (XLenTy.getSizeInBits() == 32 &&
1043 VecTy.getElementType().getSizeInBits() == 64) {
1044 auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI);
1045 buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,
1046 MRI);
1047 MI.eraseFromParent();
1048 return true;
1049 }
1050
1051 // All-zeros or all-ones splats are handled specially.
1052 MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);
1053 if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {
1054 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1055 MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});
1056 MI.eraseFromParent();
1057 return true;
1058 }
1059 if (isNullOrNullSplat(SplatValMI, MRI)) {
1060 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1061 MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});
1062 MI.eraseFromParent();
1063 return true;
1064 }
1065
1066 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1067 // ones) by promoting it to an s8 splat.
1068 LLT InterEltTy = LLT::scalar(8);
1069 LLT InterTy = VecTy.changeElementType(InterEltTy);
1070 auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);
1071 auto And =
1072 MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));
1073 auto LHS = MIB.buildSplatVector(InterTy, And);
1074 auto ZeroSplat =
1075 MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));
1076 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);
1077 MI.eraseFromParent();
1078 return true;
1079}
1080
1081static LLT getLMUL1Ty(LLT VecTy) {
1082 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1083 "Unexpected vector LLT");
1085 VecTy.getElementType().getSizeInBits(),
1086 VecTy.getElementType());
1087}
1088
1089bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1090 MachineIRBuilder &MIB) const {
1091 GExtractSubvector &ES = cast<GExtractSubvector>(MI);
1092
1093 MachineRegisterInfo &MRI = *MIB.getMRI();
1094
1095 Register Dst = ES.getReg(0);
1096 Register Src = ES.getSrcVec();
1097 uint64_t Idx = ES.getIndexImm();
1098
1099 // With an index of 0 this is a cast-like subvector, which can be performed
1100 // with subregister operations.
1101 if (Idx == 0)
1102 return true;
1103
1104 LLT LitTy = MRI.getType(Dst);
1105 LLT BigTy = MRI.getType(Src);
1106
1107 if (LitTy.getElementType() == LLT::scalar(1)) {
1108 // We can't slide this mask vector up indexed by its i1 elements.
1109 // This poses a problem when we wish to insert a scalable vector which
1110 // can't be re-expressed as a larger type. Just choose the slow path and
1111 // extend to a larger type, then truncate back down.
1112 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1113 LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
1114 auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
1115 auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
1116 auto SplatZero = MIB.buildSplatVector(
1117 ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
1118 MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
1119 MI.eraseFromParent();
1120 return true;
1121 }
1122
1123 // extract_subvector scales the index by vscale if the subvector is scalable,
1124 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1125 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1126 MVT LitTyMVT = getMVTForLLT(LitTy);
1127 auto Decompose =
1129 getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
1130 unsigned RemIdx = Decompose.second;
1131
1132 // If the Idx has been completely eliminated then this is a subvector extract
1133 // which naturally aligns to a vector register. These can easily be handled
1134 // using subregister manipulation.
1135 if (RemIdx == 0)
1136 return true;
1137
1138 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1139 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1140 // divide exactly.
1141 assert(
1144
1145 // If the vector type is an LMUL-group type, extract a subvector equal to the
1146 // nearest full vector register type.
1147 LLT InterLitTy = BigTy;
1148 Register Vec = Src;
1150 getLMUL1Ty(BigTy).getSizeInBits())) {
1151 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1152 // we should have successfully decomposed the extract into a subregister.
1153 assert(Decompose.first != RISCV::NoSubRegister);
1154 InterLitTy = getLMUL1Ty(BigTy);
1155 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1156 // specified on the source Register (the equivalent) since generic virtual
1157 // register does not allow subregister index.
1158 Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
1159 }
1160
1161 // Slide this vector register down by the desired number of elements in order
1162 // to place the desired subvector starting at element 0.
1163 const LLT XLenTy(STI.getXLenVT());
1164 auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
1165 auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
1167 auto Slidedown = MIB.buildInstr(
1168 RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
1169 {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1170
1171 // Now the vector is in the right position, extract our final subvector. This
1172 // should resolve to a COPY.
1173 MIB.buildExtractSubvector(Dst, Slidedown, 0);
1174
1175 MI.eraseFromParent();
1176 return true;
1177}
1178
1179bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1180 LegalizerHelper &Helper,
1181 MachineIRBuilder &MIB) const {
1182 GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1183
1184 MachineRegisterInfo &MRI = *MIB.getMRI();
1185
1186 Register Dst = IS.getReg(0);
1187 Register BigVec = IS.getBigVec();
1188 Register LitVec = IS.getSubVec();
1189 uint64_t Idx = IS.getIndexImm();
1190
1191 LLT BigTy = MRI.getType(BigVec);
1192 LLT LitTy = MRI.getType(LitVec);
1193
1194 if (Idx == 0 ||
1195 MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1196 return true;
1197
1198 // We don't have the ability to slide mask vectors up indexed by their i1
1199 // elements; the smallest we can do is i8. Often we are able to bitcast to
1200 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1201 // vectors and truncate down after the insert.
1202 if (LitTy.getElementType() == LLT::scalar(1)) {
1203 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1204 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1205 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1206 return Helper.bitcast(
1207 IS, 0,
1209
1210 // We can't slide this mask vector up indexed by its i1 elements.
1211 // This poses a problem when we wish to insert a scalable vector which
1212 // can't be re-expressed as a larger type. Just choose the slow path and
1213 // extend to a larger type, then truncate back down.
1214 LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
1215 return Helper.widenScalar(IS, 0, ExtBigTy);
1216 }
1217
1218 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1219 unsigned SubRegIdx, RemIdx;
1220 std::tie(SubRegIdx, RemIdx) =
1222 getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
1223
1224 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
1226 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1227 bool ExactlyVecRegSized =
1228 STI.expandVScale(LitTy.getSizeInBits())
1229 .isKnownMultipleOf(STI.expandVScale(VecRegSize));
1230
1231 // If the Idx has been completely eliminated and this subvector's size is a
1232 // vector register or a multiple thereof, or the surrounding elements are
1233 // undef, then this is a subvector insert which naturally aligns to a vector
1234 // register. These can easily be handled using subregister manipulation.
1235 if (RemIdx == 0 && ExactlyVecRegSized)
1236 return true;
1237
1238 // If the subvector is smaller than a vector register, then the insertion
1239 // must preserve the undisturbed elements of the register. We do this by
1240 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1241 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1242 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1243 // LMUL=1 type back into the larger vector (resolving to another subregister
1244 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1245 // to avoid allocating a large register group to hold our subvector.
1246
1247 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1248 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1249 // (in our case undisturbed). This means we can set up a subvector insertion
1250 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1251 // size of the subvector.
1252 const LLT XLenTy(STI.getXLenVT());
1253 LLT InterLitTy = BigTy;
1254 Register AlignedExtract = BigVec;
1255 unsigned AlignedIdx = Idx - RemIdx;
1257 getLMUL1Ty(BigTy).getSizeInBits())) {
1258 InterLitTy = getLMUL1Ty(BigTy);
1259 // Extract a subvector equal to the nearest full vector register type. This
1260 // should resolve to a G_EXTRACT on a subreg.
1261 AlignedExtract =
1262 MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
1263 }
1264
1265 auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
1266 LitVec, 0);
1267
1268 auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
1269 auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
1270
1271 // If we're inserting into the lowest elements, use a tail undisturbed
1272 // vmv.v.v.
1273 MachineInstrBuilder Inserted;
1274 bool NeedInsertSubvec =
1275 TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits());
1276 Register InsertedDst =
1277 NeedInsertSubvec ? MRI.createGenericVirtualRegister(InterLitTy) : Dst;
1278 if (RemIdx == 0) {
1279 Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InsertedDst},
1280 {AlignedExtract, Insert, VL});
1281 } else {
1282 auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
1283 // Construct the vector length corresponding to RemIdx + length(LitTy).
1284 VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
1285 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1286 ElementCount EndIndex =
1289 if (STI.expandVScale(EndIndex) ==
1290 STI.expandVScale(InterLitTy.getElementCount()))
1292
1293 Inserted =
1294 MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InsertedDst},
1295 {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1296 }
1297
1298 // If required, insert this subvector back into the correct vector register.
1299 // This should resolve to an INSERT_SUBREG instruction.
1300 if (NeedInsertSubvec)
1301 MIB.buildInsertSubvector(Dst, BigVec, Inserted, AlignedIdx);
1302
1303 MI.eraseFromParent();
1304 return true;
1305}
1306
1307static unsigned getRISCVWOpcode(unsigned Opcode) {
1308 switch (Opcode) {
1309 default:
1310 llvm_unreachable("Unexpected opcode");
1311 case TargetOpcode::G_ASHR:
1312 return RISCV::G_SRAW;
1313 case TargetOpcode::G_LSHR:
1314 return RISCV::G_SRLW;
1315 case TargetOpcode::G_SHL:
1316 return RISCV::G_SLLW;
1317 case TargetOpcode::G_SDIV:
1318 return RISCV::G_DIVW;
1319 case TargetOpcode::G_UDIV:
1320 return RISCV::G_DIVUW;
1321 case TargetOpcode::G_UREM:
1322 return RISCV::G_REMUW;
1323 case TargetOpcode::G_ROTL:
1324 return RISCV::G_ROLW;
1325 case TargetOpcode::G_ROTR:
1326 return RISCV::G_RORW;
1327 case TargetOpcode::G_CTLZ:
1328 return RISCV::G_CLZW;
1329 case TargetOpcode::G_CTTZ:
1330 return RISCV::G_CTZW;
1331 case TargetOpcode::G_FPTOSI:
1332 return RISCV::G_FCVT_W_RV64;
1333 case TargetOpcode::G_FPTOUI:
1334 return RISCV::G_FCVT_WU_RV64;
1335 }
1336}
1337
1340 LostDebugLocObserver &LocObserver) const {
1341 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1342 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1343 MachineFunction &MF = *MI.getParent()->getParent();
1344 switch (MI.getOpcode()) {
1345 default:
1346 // No idea what to do.
1347 return false;
1348 case TargetOpcode::G_ABS:
1349 return Helper.lowerAbsToMaxNeg(MI);
1350 // TODO: G_FCONSTANT
1351 case TargetOpcode::G_CONSTANT: {
1352 const Function &F = MF.getFunction();
1353 // TODO: if PSI and BFI are present, add " ||
1354 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1355 bool ShouldOptForSize = F.hasOptSize();
1356 const ConstantInt *ConstVal = MI.getOperand(1).getCImm();
1357 if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))
1358 return true;
1359 return Helper.lowerConstant(MI);
1360 }
1361 case TargetOpcode::G_SUB:
1362 case TargetOpcode::G_ADD: {
1363 Helper.Observer.changingInstr(MI);
1364 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1365 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1366
1367 Register DstALU = MRI.createGenericVirtualRegister(sXLen);
1368
1369 MachineOperand &MO = MI.getOperand(0);
1370 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1371 auto DstSext = MIRBuilder.buildSExtInReg(sXLen, DstALU, 32);
1372
1373 MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {MO}, {DstSext});
1374 MO.setReg(DstALU);
1375
1376 Helper.Observer.changedInstr(MI);
1377 return true;
1378 }
1379 case TargetOpcode::G_SEXT_INREG: {
1380 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1381 int64_t SizeInBits = MI.getOperand(2).getImm();
1382 // Source size of 32 is sext.w.
1383 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1384 return true;
1385
1386 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1387 return true;
1388
1389 return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
1391 }
1392 case TargetOpcode::G_ASHR:
1393 case TargetOpcode::G_LSHR:
1394 case TargetOpcode::G_SHL: {
1395 if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
1396 // We don't need a custom node for shift by constant. Just widen the
1397 // source and the shift amount.
1398 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1399 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1400 ExtOpc = TargetOpcode::G_SEXT;
1401 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1402 ExtOpc = TargetOpcode::G_ZEXT;
1403
1404 Helper.Observer.changingInstr(MI);
1405 Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
1406 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
1407 Helper.widenScalarDst(MI, sXLen);
1408 Helper.Observer.changedInstr(MI);
1409 return true;
1410 }
1411
1412 Helper.Observer.changingInstr(MI);
1413 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1414 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1415 Helper.widenScalarDst(MI, sXLen);
1416 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1417 Helper.Observer.changedInstr(MI);
1418 return true;
1419 }
1420 case TargetOpcode::G_SDIV:
1421 case TargetOpcode::G_UDIV:
1422 case TargetOpcode::G_UREM:
1423 case TargetOpcode::G_ROTL:
1424 case TargetOpcode::G_ROTR: {
1425 Helper.Observer.changingInstr(MI);
1426 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1427 Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1428 Helper.widenScalarDst(MI, sXLen);
1429 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1430 Helper.Observer.changedInstr(MI);
1431 return true;
1432 }
1433 case TargetOpcode::G_CTLZ:
1434 case TargetOpcode::G_CTTZ: {
1435 Helper.Observer.changingInstr(MI);
1436 Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1437 Helper.widenScalarDst(MI, sXLen);
1438 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1439 Helper.Observer.changedInstr(MI);
1440 return true;
1441 }
1442 case TargetOpcode::G_FPTOSI:
1443 case TargetOpcode::G_FPTOUI: {
1444 Helper.Observer.changingInstr(MI);
1445 Helper.widenScalarDst(MI, sXLen);
1446 MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1448 Helper.Observer.changedInstr(MI);
1449 return true;
1450 }
1451 case TargetOpcode::G_IS_FPCLASS: {
1452 Register GISFPCLASS = MI.getOperand(0).getReg();
1453 Register Src = MI.getOperand(1).getReg();
1454 const MachineOperand &ImmOp = MI.getOperand(2);
1455 MachineIRBuilder MIB(MI);
1456
1457 // Turn LLVM IR's floating point classes to that in RISC-V,
1458 // by simply rotating the 10-bit immediate right by two bits.
1459 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1460 auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));
1461 auto ConstZero = MIB.buildConstant(sXLen, 0);
1462
1463 auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});
1464 auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);
1465 MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);
1466
1467 MI.eraseFromParent();
1468 return true;
1469 }
1470 case TargetOpcode::G_BRJT:
1471 return legalizeBRJT(MI, MIRBuilder);
1472 case TargetOpcode::G_VASTART:
1473 return legalizeVAStart(MI, MIRBuilder);
1474 case TargetOpcode::G_VSCALE:
1475 return legalizeVScale(MI, MIRBuilder);
1476 case TargetOpcode::G_ZEXT:
1477 case TargetOpcode::G_SEXT:
1478 case TargetOpcode::G_ANYEXT:
1479 return legalizeExt(MI, MIRBuilder);
1480 case TargetOpcode::G_SPLAT_VECTOR:
1481 return legalizeSplatVector(MI, MIRBuilder);
1482 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1483 return legalizeExtractSubvector(MI, MIRBuilder);
1484 case TargetOpcode::G_INSERT_SUBVECTOR:
1485 return legalizeInsertSubvector(MI, Helper, MIRBuilder);
1486 case TargetOpcode::G_LOAD:
1487 case TargetOpcode::G_STORE:
1488 return legalizeLoadStore(MI, Helper, MIRBuilder);
1489 }
1490
1491 llvm_unreachable("expected switch to return");
1492}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:55
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define P(N)
ppc ctr loops verify
static LLT getLMUL1Ty(LLT VecTy)
static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static std::pair< MachineInstrBuilder, MachineInstrBuilder > buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
Gets the two common "VL" operands: an all-ones mask and the vector length.
static LegalityPredicate typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list< LLT > BoolVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, const SrcOp &Scalar, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LegalityPredicate typeIsLegalIntOrFPVec(unsigned TypeIdx, std::initializer_list< LLT > IntOrFPVecTys, const RISCVSubtarget &ST)
static MachineInstrBuilder buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, Register Hi, const SrcOp &VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx, std::initializer_list< LLT > PtrVecTys, const RISCVSubtarget &ST)
static unsigned getRISCVWOpcode(unsigned Opcode)
This file declares the targeting of the Machinelegalizer class for RISC-V.
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
LLVM_ABI APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition APInt.cpp:1154
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
@ ICMP_NE
not equal
Definition InstrTypes.h:700
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:64
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineFunction & getMF()
Getter for the function we currently build.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildBrIndirect(Register Tgt)
Build and insert G_BRINDIRECT Tgt.
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
LLVM_ABI unsigned getEntrySize(const DataLayout &TD) const
getEntrySize - Return the size of each entry in the jump table.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
LLVM_ABI unsigned getEntryAlignment(const DataLayout &TD) const
getEntryAlignment - Return the alignment of each entry in the jump table.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
RISCVLegalizerInfo(const RISCVSubtarget &ST)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Register getReg() const
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static constexpr unsigned RVVBitsPerBlock
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2029
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1605
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1587
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< LLT > Types
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.