LLVM 22.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
17#include "R600Defines.h"
19#include "R600Subtarget.h"
20#include "R600TargetMachine.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/IntrinsicsR600.h"
25
26using namespace llvm;
27
28#include "R600GenCallingConv.inc"
29
31 const R600Subtarget &STI)
32 : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
33 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
34 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
35 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
36 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
37 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
38 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
39
42
44
45 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
47
48 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
49 // spaces, so it is custom lowered to handle those where it isn't.
51 for (MVT VT : MVT::integer_valuetypes()) {
52 setLoadExtAction(Op, VT, MVT::i1, Promote);
53 setLoadExtAction(Op, VT, MVT::i8, Custom);
54 setLoadExtAction(Op, VT, MVT::i16, Custom);
55 }
56
57 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
59 MVT::v2i1, Expand);
60
62 MVT::v4i1, Expand);
63
64 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
65 Custom);
66
67 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
68 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
69 // We need to include these since trunc STORES to PRIVATE need
70 // special handling to accommodate RMW
71 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
72 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
73 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
74 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
75 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
76 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
77 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
78 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
79 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
80 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
81
82 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
83 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
84 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
85
86 // Set condition code actions
90 MVT::f32, Expand);
91
93 MVT::i32, Expand);
94
96
97 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
98
99 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
101
103
105 {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
106 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
107 Expand);
108
110 MVT::f64, Custom);
111
112 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
113
114 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
115 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
116 Custom);
117
118 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
119 Expand);
120
121 // ADD, SUB overflow.
122 // TODO: turn these into Legal?
123 if (Subtarget->hasCARRY())
125
126 if (Subtarget->hasBORROW())
128
129 // Expand sign extension of vectors
130 if (!Subtarget->hasBFE())
132
133 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
134
135 if (!Subtarget->hasBFE())
137 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
138
139 if (!Subtarget->hasBFE())
141 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
142
144 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
145
147
149
151 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
152
154 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
155
156 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
157 // to be Legal/Custom in order to avoid library calls.
159 Custom);
160
161 if (!Subtarget->hasFMA())
162 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
163
164 // FIXME: May need no denormals check
166
167 if (!Subtarget->hasBFI())
168 // fcopysign can be done in a single instruction with BFI.
169 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
170
171 if (!Subtarget->hasBCNT(32))
173
174 if (!Subtarget->hasBCNT(64))
176
177 if (Subtarget->hasFFBH())
179
180 if (Subtarget->hasFFBL())
182
183 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
184 // need it for R600.
185 if (Subtarget->hasBFE())
187
190
191 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
192 for (MVT VT : ScalarIntVTs)
194 Expand);
195
196 // LLVM will expand these to atomic_cmp_swap(0)
197 // and atomic_swap, respectively.
199
200 // We need to custom lower some of the intrinsics
202 Custom);
203
205
208}
209
211 if (std::next(I) == I->getParent()->end())
212 return false;
213 return std::next(I)->getOpcode() == R600::RETURN;
214}
215
218 MachineBasicBlock *BB) const {
219 MachineFunction *MF = BB->getParent();
222 const R600InstrInfo *TII = Subtarget->getInstrInfo();
223
224 switch (MI.getOpcode()) {
225 default:
226 // Replace LDS_*_RET instruction that don't have any uses with the
227 // equivalent LDS_*_NORET instruction.
228 if (TII->isLDSRetInstr(MI.getOpcode())) {
229 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
230 assert(DstIdx != -1);
232 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
233 // LDS_1A2D support and remove this special case.
234 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
235 MI.getOpcode() == R600::LDS_CMPST_RET)
236 return BB;
237
238 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
239 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
240 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
241 NewMI.add(MO);
242 } else {
244 }
245 break;
246
247 case R600::FABS_R600: {
248 MachineInstr *NewMI = TII->buildDefaultInstruction(
249 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
250 MI.getOperand(1).getReg());
251 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
252 break;
253 }
254
255 case R600::FNEG_R600: {
256 MachineInstr *NewMI = TII->buildDefaultInstruction(
257 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
258 MI.getOperand(1).getReg());
259 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
260 break;
261 }
262
263 case R600::MASK_WRITE: {
264 Register maskedRegister = MI.getOperand(0).getReg();
265 assert(maskedRegister.isVirtual());
266 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
267 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
268 break;
269 }
270
271 case R600::MOV_IMM_F32:
272 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
273 .getFPImm()
274 ->getValueAPF()
275 .bitcastToAPInt()
276 .getZExtValue());
277 break;
278
279 case R600::MOV_IMM_I32:
280 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
281 MI.getOperand(1).getImm());
282 break;
283
284 case R600::MOV_IMM_GLOBAL_ADDR: {
285 //TODO: Perhaps combine this instruction with the next if possible
286 auto MIB = TII->buildDefaultInstruction(
287 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
288 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
289 //TODO: Ugh this is rather ugly
290 const MachineOperand &MO = MI.getOperand(1);
291 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
292 MO.getTargetFlags());
293 break;
294 }
295
296 case R600::CONST_COPY: {
297 MachineInstr *NewMI = TII->buildDefaultInstruction(
298 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
299 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
300 MI.getOperand(1).getImm());
301 break;
302 }
303
304 case R600::RAT_WRITE_CACHELESS_32_eg:
305 case R600::RAT_WRITE_CACHELESS_64_eg:
306 case R600::RAT_WRITE_CACHELESS_128_eg:
307 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
308 .add(MI.getOperand(0))
309 .add(MI.getOperand(1))
310 .addImm(isEOP(I)); // Set End of program bit
311 break;
312
313 case R600::RAT_STORE_TYPED_eg:
314 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
315 .add(MI.getOperand(0))
316 .add(MI.getOperand(1))
317 .add(MI.getOperand(2))
318 .addImm(isEOP(I)); // Set End of program bit
319 break;
320
321 case R600::BRANCH:
322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
323 .add(MI.getOperand(0));
324 break;
325
326 case R600::BRANCH_COND_f32: {
327 MachineInstr *NewMI =
328 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
329 R600::PREDICATE_BIT)
330 .add(MI.getOperand(1))
331 .addImm(R600::PRED_SETNE)
332 .addImm(0); // Flags
333 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
334 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
335 .add(MI.getOperand(0))
336 .addReg(R600::PREDICATE_BIT, RegState::Kill);
337 break;
338 }
339
340 case R600::BRANCH_COND_i32: {
341 MachineInstr *NewMI =
342 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
343 R600::PREDICATE_BIT)
344 .add(MI.getOperand(1))
345 .addImm(R600::PRED_SETNE_INT)
346 .addImm(0); // Flags
347 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
348 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
349 .add(MI.getOperand(0))
350 .addReg(R600::PREDICATE_BIT, RegState::Kill);
351 break;
352 }
353
354 case R600::EG_ExportSwz:
355 case R600::R600_ExportSwz: {
356 // Instruction is left unmodified if its not the last one of its type
357 bool isLastInstructionOfItsType = true;
358 unsigned InstExportType = MI.getOperand(1).getImm();
359 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
360 EndBlock = BB->end(); NextExportInst != EndBlock;
361 NextExportInst = std::next(NextExportInst)) {
362 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
363 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
364 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
365 .getImm();
366 if (CurrentInstExportType == InstExportType) {
367 isLastInstructionOfItsType = false;
368 break;
369 }
370 }
371 }
372 bool EOP = isEOP(I);
373 if (!EOP && !isLastInstructionOfItsType)
374 return BB;
375 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
376 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
377 .add(MI.getOperand(0))
378 .add(MI.getOperand(1))
379 .add(MI.getOperand(2))
380 .add(MI.getOperand(3))
381 .add(MI.getOperand(4))
382 .add(MI.getOperand(5))
383 .add(MI.getOperand(6))
384 .addImm(CfInst)
385 .addImm(EOP);
386 break;
387 }
388 case R600::RETURN: {
389 return BB;
390 }
391 }
392
393 MI.eraseFromParent();
394 return BB;
395}
396
397//===----------------------------------------------------------------------===//
398// Custom DAG Lowering Operations
399//===----------------------------------------------------------------------===//
400
404 switch (Op.getOpcode()) {
405 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
406 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
407 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
408 case ISD::SHL_PARTS:
409 case ISD::SRA_PARTS:
410 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
411 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
412 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
413 case ISD::FCOS:
414 case ISD::FSIN: return LowerTrig(Op, DAG);
415 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
416 case ISD::STORE: return LowerSTORE(Op, DAG);
417 case ISD::LOAD: {
418 SDValue Result = LowerLOAD(Op, DAG);
419 assert((!Result.getNode() ||
420 Result.getNode()->getNumValues() == 2) &&
421 "Load should return a value and a chain");
422 return Result;
423 }
424
425 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
426 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
427 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
429 return lowerADDRSPACECAST(Op, DAG);
430 case ISD::INTRINSIC_VOID: {
431 SDValue Chain = Op.getOperand(0);
432 unsigned IntrinsicID = Op.getConstantOperandVal(1);
433 switch (IntrinsicID) {
434 case Intrinsic::r600_store_swizzle: {
435 SDLoc DL(Op);
436 const SDValue Args[8] = {
437 Chain,
438 Op.getOperand(2), // Export Value
439 Op.getOperand(3), // ArrayBase
440 Op.getOperand(4), // Type
441 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
442 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
443 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
444 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
445 };
446 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
447 }
448
449 // default for switch(IntrinsicID)
450 default: break;
451 }
452 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
453 break;
454 }
456 unsigned IntrinsicID = Op.getConstantOperandVal(0);
457 EVT VT = Op.getValueType();
458 SDLoc DL(Op);
459 switch (IntrinsicID) {
460 case Intrinsic::r600_tex:
461 case Intrinsic::r600_texc: {
462 unsigned TextureOp;
463 switch (IntrinsicID) {
464 case Intrinsic::r600_tex:
465 TextureOp = 0;
466 break;
467 case Intrinsic::r600_texc:
468 TextureOp = 1;
469 break;
470 default:
471 llvm_unreachable("unhandled texture operation");
472 }
473
474 SDValue TexArgs[19] = {
475 DAG.getConstant(TextureOp, DL, MVT::i32),
476 Op.getOperand(1),
477 DAG.getConstant(0, DL, MVT::i32),
478 DAG.getConstant(1, DL, MVT::i32),
479 DAG.getConstant(2, DL, MVT::i32),
480 DAG.getConstant(3, DL, MVT::i32),
481 Op.getOperand(2),
482 Op.getOperand(3),
483 Op.getOperand(4),
484 DAG.getConstant(0, DL, MVT::i32),
485 DAG.getConstant(1, DL, MVT::i32),
486 DAG.getConstant(2, DL, MVT::i32),
487 DAG.getConstant(3, DL, MVT::i32),
488 Op.getOperand(5),
489 Op.getOperand(6),
490 Op.getOperand(7),
491 Op.getOperand(8),
492 Op.getOperand(9),
493 Op.getOperand(10)
494 };
495 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
496 }
497 case Intrinsic::r600_dot4: {
498 SDValue Args[8] = {
499 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
500 DAG.getConstant(0, DL, MVT::i32)),
501 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
502 DAG.getConstant(0, DL, MVT::i32)),
503 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
504 DAG.getConstant(1, DL, MVT::i32)),
505 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
506 DAG.getConstant(1, DL, MVT::i32)),
507 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
508 DAG.getConstant(2, DL, MVT::i32)),
509 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
510 DAG.getConstant(2, DL, MVT::i32)),
511 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
512 DAG.getConstant(3, DL, MVT::i32)),
513 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
514 DAG.getConstant(3, DL, MVT::i32))
515 };
516 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
517 }
518
519 case Intrinsic::r600_implicitarg_ptr: {
522 return DAG.getConstant(ByteOffset, DL, PtrVT);
523 }
524 case Intrinsic::r600_read_ngroups_x:
525 return LowerImplicitParameter(DAG, VT, DL, 0);
526 case Intrinsic::r600_read_ngroups_y:
527 return LowerImplicitParameter(DAG, VT, DL, 1);
528 case Intrinsic::r600_read_ngroups_z:
529 return LowerImplicitParameter(DAG, VT, DL, 2);
530 case Intrinsic::r600_read_global_size_x:
531 return LowerImplicitParameter(DAG, VT, DL, 3);
532 case Intrinsic::r600_read_global_size_y:
533 return LowerImplicitParameter(DAG, VT, DL, 4);
534 case Intrinsic::r600_read_global_size_z:
535 return LowerImplicitParameter(DAG, VT, DL, 5);
536 case Intrinsic::r600_read_local_size_x:
537 return LowerImplicitParameter(DAG, VT, DL, 6);
538 case Intrinsic::r600_read_local_size_y:
539 return LowerImplicitParameter(DAG, VT, DL, 7);
540 case Intrinsic::r600_read_local_size_z:
541 return LowerImplicitParameter(DAG, VT, DL, 8);
542
543 case Intrinsic::r600_read_tgid_x:
544 case Intrinsic::amdgcn_workgroup_id_x:
545 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
546 R600::T1_X, VT);
547 case Intrinsic::r600_read_tgid_y:
548 case Intrinsic::amdgcn_workgroup_id_y:
549 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
550 R600::T1_Y, VT);
551 case Intrinsic::r600_read_tgid_z:
552 case Intrinsic::amdgcn_workgroup_id_z:
553 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
554 R600::T1_Z, VT);
555 case Intrinsic::r600_read_tidig_x:
556 case Intrinsic::amdgcn_workitem_id_x:
557 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
558 R600::T0_X, VT);
559 case Intrinsic::r600_read_tidig_y:
560 case Intrinsic::amdgcn_workitem_id_y:
561 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
562 R600::T0_Y, VT);
563 case Intrinsic::r600_read_tidig_z:
564 case Intrinsic::amdgcn_workitem_id_z:
565 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
566 R600::T0_Z, VT);
567
568 case Intrinsic::r600_recipsqrt_ieee:
569 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
570
571 case Intrinsic::r600_recipsqrt_clamped:
572 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
573 default:
574 return Op;
575 }
576
577 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
578 break;
579 }
580 } // end switch(Op.getOpcode())
581 return SDValue();
582}
583
586 SelectionDAG &DAG) const {
587 switch (N->getOpcode()) {
588 default:
590 return;
591 case ISD::FP_TO_UINT:
592 if (N->getValueType(0) == MVT::i1) {
593 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
594 return;
595 }
596 // Since we don't care about out of bounds values we can use FP_TO_SINT for
597 // uints too. The DAGLegalizer code for uint considers some extra cases
598 // which are not necessary here.
599 [[fallthrough]];
600 case ISD::FP_TO_SINT: {
601 if (N->getValueType(0) == MVT::i1) {
602 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
603 return;
604 }
605
606 SDValue Result;
607 if (expandFP_TO_SINT(N, Result, DAG))
608 Results.push_back(Result);
609 return;
610 }
611 case ISD::SDIVREM: {
612 SDValue Op = SDValue(N, 1);
613 SDValue RES = LowerSDIVREM(Op, DAG);
614 Results.push_back(RES);
615 Results.push_back(RES.getValue(1));
616 break;
617 }
618 case ISD::UDIVREM: {
619 SDValue Op = SDValue(N, 0);
621 break;
622 }
623 }
624}
625
626SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
627 SDValue Vector) const {
628 SDLoc DL(Vector);
629 EVT VecVT = Vector.getValueType();
630 EVT EltVT = VecVT.getVectorElementType();
632
633 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
634 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
635 DAG.getVectorIdxConstant(i, DL)));
636 }
637
638 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
639}
640
641SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
642 SelectionDAG &DAG) const {
643 SDLoc DL(Op);
644 SDValue Vector = Op.getOperand(0);
645 SDValue Index = Op.getOperand(1);
646
647 if (isa<ConstantSDNode>(Index) ||
649 return Op;
650
651 Vector = vectorToVerticalVector(DAG, Vector);
652 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
653 Vector, Index);
654}
655
656SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
657 SelectionDAG &DAG) const {
658 SDLoc DL(Op);
659 SDValue Vector = Op.getOperand(0);
660 SDValue Value = Op.getOperand(1);
661 SDValue Index = Op.getOperand(2);
662
663 if (isa<ConstantSDNode>(Index) ||
665 return Op;
666
667 Vector = vectorToVerticalVector(DAG, Vector);
668 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
669 Vector, Value, Index);
670 return vectorToVerticalVector(DAG, Insert);
671}
672
673SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
674 SDValue Op,
675 SelectionDAG &DAG) const {
676 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
679
680 const DataLayout &DL = DAG.getDataLayout();
681 const GlobalValue *GV = GSD->getGlobal();
683
684 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
685 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
686}
687
688SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
689 // On hw >= R700, COS/SIN input must be between -1. and 1.
690 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
691 EVT VT = Op.getValueType();
692 SDValue Arg = Op.getOperand(0);
693 SDLoc DL(Op);
694
695 // TODO: Should this propagate fast-math-flags?
696 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
697 DAG.getNode(ISD::FADD, DL, VT,
698 DAG.getNode(ISD::FMUL, DL, VT, Arg,
699 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
700 DAG.getConstantFP(0.5, DL, MVT::f32)));
701 unsigned TrigNode;
702 switch (Op.getOpcode()) {
703 case ISD::FCOS:
704 TrigNode = AMDGPUISD::COS_HW;
705 break;
706 case ISD::FSIN:
707 TrigNode = AMDGPUISD::SIN_HW;
708 break;
709 default:
710 llvm_unreachable("Wrong trig opcode");
711 }
712 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
713 DAG.getNode(ISD::FADD, DL, VT, FractPart,
714 DAG.getConstantFP(-0.5, DL, MVT::f32)));
715 if (Gen >= AMDGPUSubtarget::R700)
716 return TrigVal;
717 // On R600 hw, COS/SIN input must be between -Pi and Pi.
718 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
719 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
720}
721
722SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
723 SelectionDAG &DAG) const {
724 SDValue Lo, Hi;
725 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
726 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
727}
728
729SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
730 unsigned mainop, unsigned ovf) const {
731 SDLoc DL(Op);
732 EVT VT = Op.getValueType();
733
734 SDValue Lo = Op.getOperand(0);
735 SDValue Hi = Op.getOperand(1);
736
737 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
738 // Extend sign.
739 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
740 DAG.getValueType(MVT::i1));
741
742 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
743
744 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
745}
746
747SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
748 SDLoc DL(Op);
749 return DAG.getNode(
751 DL,
752 MVT::i1,
753 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
755}
756
757SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
758 SDLoc DL(Op);
759 return DAG.getNode(
761 DL,
762 MVT::i1,
763 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
765}
766
767SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
768 const SDLoc &DL,
769 unsigned DwordOffset) const {
770 unsigned ByteOffset = DwordOffset * 4;
771 PointerType *PtrType =
773
774 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
775 assert(isInt<16>(ByteOffset));
776
777 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
778 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
780}
781
782bool R600TargetLowering::isZero(SDValue Op) const {
783 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))
784 return Cst->isZero();
785 if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))
786 return CstFP->isZero();
787 return false;
788}
789
790bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
791 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
792 return CFP->isExactlyValue(1.0);
793 }
794 return isAllOnesConstant(Op);
795}
796
797bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
798 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
799 return CFP->getValueAPF().isZero();
800 }
801 return isNullConstant(Op);
802}
803
804SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
805 SDLoc DL(Op);
806 EVT VT = Op.getValueType();
807
808 SDValue LHS = Op.getOperand(0);
809 SDValue RHS = Op.getOperand(1);
810 SDValue True = Op.getOperand(2);
811 SDValue False = Op.getOperand(3);
812 SDValue CC = Op.getOperand(4);
813 SDValue Temp;
814
815 if (VT == MVT::f32) {
816 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
817 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
818 if (MinMax)
819 return MinMax;
820 }
821
822 // LHS and RHS are guaranteed to be the same value type
823 EVT CompareVT = LHS.getValueType();
824
825 // Check if we can lower this to a native operation.
826
827 // Try to lower to a SET* instruction:
828 //
829 // SET* can match the following patterns:
830 //
831 // select_cc f32, f32, -1, 0, cc_supported
832 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
833 // select_cc i32, i32, -1, 0, cc_supported
834 //
835
836 // Move hardware True/False values to the correct operand.
837 if (isHWTrueValue(False) && isHWFalseValue(True)) {
838 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
839 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
840 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
841 std::swap(False, True);
842 CC = DAG.getCondCode(InverseCC);
843 } else {
844 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
845 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
846 std::swap(False, True);
847 std::swap(LHS, RHS);
848 CC = DAG.getCondCode(SwapInvCC);
849 }
850 }
851 }
852
853 if (isHWTrueValue(True) && isHWFalseValue(False) &&
854 (CompareVT == VT || VT == MVT::i32)) {
855 // This can be matched by a SET* instruction.
856 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
857 }
858
859 // Try to lower to a CND* instruction:
860 //
861 // CND* can match the following patterns:
862 //
863 // select_cc f32, 0.0, f32, f32, cc_supported
864 // select_cc f32, 0.0, i32, i32, cc_supported
865 // select_cc i32, 0, f32, f32, cc_supported
866 // select_cc i32, 0, i32, i32, cc_supported
867 //
868
869 // Try to move the zero value to the RHS
870 if (isZero(LHS)) {
871 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
872 // Try swapping the operands
873 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
874 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
875 std::swap(LHS, RHS);
876 CC = DAG.getCondCode(CCSwapped);
877 } else {
878 // Try inverting the condition and then swapping the operands
879 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
880 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
881 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
882 std::swap(True, False);
883 std::swap(LHS, RHS);
884 CC = DAG.getCondCode(CCSwapped);
885 }
886 }
887 }
888 if (isZero(RHS)) {
889 SDValue Cond = LHS;
890 SDValue Zero = RHS;
891 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
892 if (CompareVT != VT) {
893 // Bitcast True / False to the correct types. This will end up being
894 // a nop, but it allows us to define only a single pattern in the
895 // .TD files for each CND* instruction rather than having to have
896 // one pattern for integer True/False and one for fp True/False
897 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
898 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
899 }
900
901 switch (CCOpcode) {
902 case ISD::SETONE:
903 case ISD::SETUNE:
904 case ISD::SETNE:
905 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
906 Temp = True;
907 True = False;
908 False = Temp;
909 break;
910 default:
911 break;
912 }
913 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
914 Cond, Zero,
915 True, False,
916 DAG.getCondCode(CCOpcode));
917 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
918 }
919
920 // If we make it this for it means we have no native instructions to handle
921 // this SELECT_CC, so we must lower it.
922 SDValue HWTrue, HWFalse;
923
924 if (CompareVT == MVT::f32) {
925 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
926 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
927 } else if (CompareVT == MVT::i32) {
928 HWTrue = DAG.getAllOnesConstant(DL, CompareVT);
929 HWFalse = DAG.getConstant(0, DL, CompareVT);
930 }
931 else {
932 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
933 }
934
935 // Lower this unsupported SELECT_CC into a combination of two supported
936 // SELECT_CC operations.
937 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
938
939 return DAG.getNode(ISD::SELECT_CC, DL, VT,
940 Cond, HWFalse,
941 True, False,
943}
944
945SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
946 SelectionDAG &DAG) const {
947 SDLoc SL(Op);
948 EVT VT = Op.getValueType();
949
950 const R600TargetMachine &TM =
951 static_cast<const R600TargetMachine &>(getTargetMachine());
952
953 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
954 unsigned SrcAS = ASC->getSrcAddressSpace();
955 unsigned DestAS = ASC->getDestAddressSpace();
956
957 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
958 return DAG.getSignedConstant(TM.getNullPointerValue(DestAS), SL, VT);
959
960 return Op;
961}
962
963/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
964/// convert these pointers to a register index. Each register holds
965/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
966/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
967/// for indirect addressing.
968SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
969 unsigned StackWidth,
970 SelectionDAG &DAG) const {
971 unsigned SRLPad;
972 switch(StackWidth) {
973 case 1:
974 SRLPad = 2;
975 break;
976 case 2:
977 SRLPad = 3;
978 break;
979 case 4:
980 SRLPad = 4;
981 break;
982 default: llvm_unreachable("Invalid stack width");
983 }
984
985 SDLoc DL(Ptr);
986 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
987 DAG.getConstant(SRLPad, DL, MVT::i32));
988}
989
990void R600TargetLowering::getStackAddress(unsigned StackWidth,
991 unsigned ElemIdx,
992 unsigned &Channel,
993 unsigned &PtrIncr) const {
994 switch (StackWidth) {
995 default:
996 case 1:
997 Channel = 0;
998 if (ElemIdx > 0) {
999 PtrIncr = 1;
1000 } else {
1001 PtrIncr = 0;
1002 }
1003 break;
1004 case 2:
1005 Channel = ElemIdx % 2;
1006 if (ElemIdx == 2) {
1007 PtrIncr = 1;
1008 } else {
1009 PtrIncr = 0;
1010 }
1011 break;
1012 case 4:
1013 Channel = ElemIdx;
1014 PtrIncr = 0;
1015 break;
1016 }
1017}
1018
1019SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1020 SelectionDAG &DAG) const {
1021 SDLoc DL(Store);
1022 //TODO: Who creates the i8 stores?
1023 assert(Store->isTruncatingStore()
1024 || Store->getValue().getValueType() == MVT::i8);
1025 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1026
1027 SDValue Mask;
1028 if (Store->getMemoryVT() == MVT::i8) {
1029 assert(Store->getAlign() >= 1);
1030 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1031 } else if (Store->getMemoryVT() == MVT::i16) {
1032 assert(Store->getAlign() >= 2);
1033 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1034 } else {
1035 llvm_unreachable("Unsupported private trunc store");
1036 }
1037
1038 SDValue OldChain = Store->getChain();
1039 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1040 // Skip dummy
1041 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1042 SDValue BasePtr = Store->getBasePtr();
1043 SDValue Offset = Store->getOffset();
1044 EVT MemVT = Store->getMemoryVT();
1045
1046 SDValue LoadPtr = BasePtr;
1047 if (!Offset.isUndef()) {
1048 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1049 }
1050
1051 // Get dword location
1052 // TODO: this should be eliminated by the future SHR ptr, 2
1053 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1054 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1055
1056 // Load dword
1057 // TODO: can we be smarter about machine pointer info?
1059 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1060
1061 Chain = Dst.getValue(1);
1062
1063 // Get offset in dword
1064 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1065 DAG.getConstant(0x3, DL, MVT::i32));
1066
1067 // Convert byte offset to bit shift
1068 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1069 DAG.getConstant(3, DL, MVT::i32));
1070
1071 // TODO: Contrary to the name of the function,
1072 // it also handles sub i32 non-truncating stores (like i1)
1073 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1074 Store->getValue());
1075
1076 // Mask the value to the right type
1077 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1078
1079 // Shift the value in place
1080 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1081 MaskedValue, ShiftAmt);
1082
1083 // Shift the mask in place
1084 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1085
1086 // Invert the mask. NOTE: if we had native ROL instructions we could
1087 // use inverted mask
1088 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1089
1090 // Cleanup the target bits
1091 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1092
1093 // Add the new bits
1094 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1095
1096 // Store dword
1097 // TODO: Can we be smarter about MachinePointerInfo?
1098 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1099
1100 // If we are part of expanded vector, make our neighbors depend on this store
1101 if (VectorTrunc) {
1102 // Make all other vector elements depend on this store
1103 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1104 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1105 }
1106 return NewStore;
1107}
1108
1109SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1110 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1111 unsigned AS = StoreNode->getAddressSpace();
1112
1113 SDValue Chain = StoreNode->getChain();
1114 SDValue Ptr = StoreNode->getBasePtr();
1115 SDValue Value = StoreNode->getValue();
1116
1117 EVT VT = Value.getValueType();
1118 EVT MemVT = StoreNode->getMemoryVT();
1119 EVT PtrVT = Ptr.getValueType();
1120
1121 SDLoc DL(Op);
1122
1123 const bool TruncatingStore = StoreNode->isTruncatingStore();
1124
1125 // Neither LOCAL nor PRIVATE can do vectors at the moment
1127 TruncatingStore) &&
1128 VT.isVector()) {
1129 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1130 // Add an extra level of chain to isolate this vector
1131 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1132 // TODO: can the chain be replaced without creating a new store?
1133 SDValue NewStore = DAG.getTruncStore(
1134 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1135 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1136 StoreNode->getAAInfo());
1137 StoreNode = cast<StoreSDNode>(NewStore);
1138 }
1139
1140 return scalarizeVectorStore(StoreNode, DAG);
1141 }
1142
1143 Align Alignment = StoreNode->getAlign();
1144 if (Alignment < MemVT.getStoreSize() &&
1145 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1146 StoreNode->getMemOperand()->getFlags(),
1147 nullptr)) {
1148 return expandUnalignedStore(StoreNode, DAG);
1149 }
1150
1151 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1152 DAG.getConstant(2, DL, PtrVT));
1153
1154 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1155 // It is beneficial to create MSKOR here instead of combiner to avoid
1156 // artificial dependencies introduced by RMW
1157 if (TruncatingStore) {
1158 assert(VT.bitsLE(MVT::i32));
1159 SDValue MaskConstant;
1160 if (MemVT == MVT::i8) {
1161 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1162 } else {
1163 assert(MemVT == MVT::i16);
1164 assert(StoreNode->getAlign() >= 2);
1165 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1166 }
1167
1168 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1169 DAG.getConstant(0x00000003, DL, PtrVT));
1170 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1171 DAG.getConstant(3, DL, VT));
1172
1173 // Put the mask in correct place
1174 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1175
1176 // Put the value bits in correct place
1177 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1178 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1179
1180 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1181 // vector instead.
1182 SDValue Src[4] = {
1183 ShiftedValue,
1184 DAG.getConstant(0, DL, MVT::i32),
1185 DAG.getConstant(0, DL, MVT::i32),
1186 Mask
1187 };
1188 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1189 SDValue Args[3] = { Chain, Input, DWordAddr };
1191 Op->getVTList(), Args, MemVT,
1192 StoreNode->getMemOperand());
1193 }
1194 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1195 // Convert pointer from byte address to dword address.
1196 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1197
1198 if (StoreNode->isIndexed()) {
1199 llvm_unreachable("Indexed stores not supported yet");
1200 } else {
1201 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1202 }
1203 return Chain;
1204 }
1205 }
1206
1207 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1208 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1209 return SDValue();
1210
1211 if (MemVT.bitsLT(MVT::i32))
1212 return lowerPrivateTruncStore(StoreNode, DAG);
1213
1214 // Standard i32+ store, tag it with DWORDADDR to note that the address
1215 // has been shifted
1216 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1217 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1218 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1219 }
1220
1221 // Tagged i32+ stores will be matched by patterns
1222 return SDValue();
1223}
1224
1225// return (512 + (kc_bank << 12)
1226static int
1228 switch (AddressSpace) {
1230 return 512;
1232 return 512 + 4096;
1234 return 512 + 4096 * 2;
1236 return 512 + 4096 * 3;
1238 return 512 + 4096 * 4;
1240 return 512 + 4096 * 5;
1242 return 512 + 4096 * 6;
1244 return 512 + 4096 * 7;
1246 return 512 + 4096 * 8;
1248 return 512 + 4096 * 9;
1250 return 512 + 4096 * 10;
1252 return 512 + 4096 * 11;
1254 return 512 + 4096 * 12;
1256 return 512 + 4096 * 13;
1258 return 512 + 4096 * 14;
1260 return 512 + 4096 * 15;
1261 default:
1262 return -1;
1263 }
1264}
1265
1266SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1267 SelectionDAG &DAG) const {
1268 SDLoc DL(Op);
1269 LoadSDNode *Load = cast<LoadSDNode>(Op);
1270 ISD::LoadExtType ExtType = Load->getExtensionType();
1271 EVT MemVT = Load->getMemoryVT();
1272 assert(Load->getAlign() >= MemVT.getStoreSize());
1273
1274 SDValue BasePtr = Load->getBasePtr();
1275 SDValue Chain = Load->getChain();
1276 SDValue Offset = Load->getOffset();
1277
1278 SDValue LoadPtr = BasePtr;
1279 if (!Offset.isUndef()) {
1280 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1281 }
1282
1283 // Get dword location
1284 // NOTE: this should be eliminated by the future SHR ptr, 2
1285 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1286 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1287
1288 // Load dword
1289 // TODO: can we be smarter about machine pointer info?
1291 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1292
1293 // Get offset within the register.
1294 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1295 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1296
1297 // Bit offset of target byte (byteIdx * 8).
1298 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1299 DAG.getConstant(3, DL, MVT::i32));
1300
1301 // Shift to the right.
1302 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1303
1304 // Eliminate the upper bits by setting them to ...
1305 EVT MemEltVT = MemVT.getScalarType();
1306
1307 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1308 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1309 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1310 } else { // ... or zeros.
1311 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1312 }
1313
1314 SDValue Ops[] = {
1315 Ret,
1316 Read.getValue(1) // This should be our output chain
1317 };
1318
1319 return DAG.getMergeValues(Ops, DL);
1320}
1321
1322SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1323 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1324 unsigned AS = LoadNode->getAddressSpace();
1325 EVT MemVT = LoadNode->getMemoryVT();
1326 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1327
1328 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1329 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1330 return lowerPrivateExtLoad(Op, DAG);
1331 }
1332
1333 SDLoc DL(Op);
1334 EVT VT = Op.getValueType();
1335 SDValue Chain = LoadNode->getChain();
1336 SDValue Ptr = LoadNode->getBasePtr();
1337
1338 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1340 VT.isVector()) {
1341 SDValue Ops[2];
1342 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1343 return DAG.getMergeValues(Ops, DL);
1344 }
1345
1346 // This is still used for explicit load from addrspace(8)
1347 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1348 if (ConstantBlock > -1 &&
1349 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1350 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1352 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1353 isa<ConstantSDNode>(Ptr)) {
1354 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1355 }
1356 // TODO: Does this even work?
1357 // non-constant ptr can't be folded, keeps it as a v4f32 load
1358 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1359 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1360 DAG.getConstant(4, DL, MVT::i32)),
1361 DAG.getConstant(LoadNode->getAddressSpace() -
1363 DL, MVT::i32));
1364
1365 if (!VT.isVector()) {
1366 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1367 DAG.getConstant(0, DL, MVT::i32));
1368 }
1369
1370 SDValue MergedValues[2] = {
1371 Result,
1372 Chain
1373 };
1374 return DAG.getMergeValues(MergedValues, DL);
1375 }
1376
1377 // For most operations returning SDValue() will result in the node being
1378 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1379 // need to manually expand loads that may be legal in some address spaces and
1380 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1381 // compute shaders, since the data is sign extended when it is uploaded to the
1382 // buffer. However SEXT loads from other address spaces are not supported, so
1383 // we need to expand them here.
1384 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1385 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1386 SDValue NewLoad = DAG.getExtLoad(
1387 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1388 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1389 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1390 DAG.getValueType(MemVT));
1391
1392 SDValue MergedValues[2] = { Res, Chain };
1393 return DAG.getMergeValues(MergedValues, DL);
1394 }
1395
1396 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1397 return SDValue();
1398 }
1399
1400 // DWORDADDR ISD marks already shifted address
1401 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1402 assert(VT == MVT::i32);
1403 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1404 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1405 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1406 }
1407 return SDValue();
1408}
1409
1410SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1411 SDValue Chain = Op.getOperand(0);
1412 SDValue Cond = Op.getOperand(1);
1413 SDValue Jump = Op.getOperand(2);
1414
1415 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1416 Chain, Jump, Cond);
1417}
1418
1419SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1420 SelectionDAG &DAG) const {
1422 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1423
1424 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1425
1426 unsigned FrameIndex = FIN->getIndex();
1427 Register IgnoredFrameReg;
1429 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1430 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1431 SDLoc(Op), Op.getValueType());
1432}
1433
1435 bool IsVarArg) const {
1436 switch (CC) {
1439 case CallingConv::C:
1440 case CallingConv::Fast:
1441 case CallingConv::Cold:
1442 llvm_unreachable("kernels should not be handled here");
1450 return CC_R600;
1451 default:
1452 reportFatalUsageError("unsupported calling convention");
1453 }
1454}
1455
1456/// XXX Only kernel functions are supported, so we can assume for now that
1457/// every function is a kernel function, but in the future we should use
1458/// separate calling conventions for kernel and non-kernel functions.
1460 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1461 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1462 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1464 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1465 *DAG.getContext());
1467
1468 if (AMDGPU::isShader(CallConv)) {
1469 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1470 } else {
1471 analyzeFormalArgumentsCompute(CCInfo, Ins);
1472 }
1473
1474 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1475 CCValAssign &VA = ArgLocs[i];
1476 const ISD::InputArg &In = Ins[i];
1477 EVT VT = In.VT;
1478 EVT MemVT = VA.getLocVT();
1479 if (!VT.isVector() && MemVT.isVector()) {
1480 // Get load source type if scalarized.
1481 MemVT = MemVT.getVectorElementType();
1482 }
1483
1484 if (AMDGPU::isShader(CallConv)) {
1485 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1486 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1487 InVals.push_back(Register);
1488 continue;
1489 }
1490
1491 // i64 isn't a legal type, so the register type used ends up as i32, which
1492 // isn't expected here. It attempts to create this sextload, but it ends up
1493 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1494 // for <1 x i64>.
1495
1496 // The first 36 bytes of the input buffer contains information about
1497 // thread group and global sizes.
1499 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1500 // FIXME: This should really check the extload type, but the handling of
1501 // extload vector parameters seems to be broken.
1502
1503 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1504 Ext = ISD::SEXTLOAD;
1505 }
1506
1507 // Compute the offset from the value.
1508 // XXX - I think PartOffset should give you this, but it seems to give the
1509 // size of the register which isn't useful.
1510
1511 unsigned PartOffset = VA.getLocMemOffset();
1512 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1513
1515 SDValue Arg = DAG.getLoad(
1516 ISD::UNINDEXED, Ext, VT, DL, Chain,
1517 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1518 PtrInfo,
1519 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1522
1523 InVals.push_back(Arg);
1524 }
1525 return Chain;
1526}
1527
1529 EVT VT) const {
1530 if (!VT.isVector())
1531 return MVT::i32;
1533}
1534
1536 const MachineFunction &MF) const {
1537 // Local and Private addresses do not handle vectors. Limit to i32
1539 return (MemVT.getSizeInBits() <= 32);
1540 }
1541 return true;
1542}
1543
1545 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1546 unsigned *IsFast) const {
1547 if (IsFast)
1548 *IsFast = 0;
1549
1550 if (!VT.isSimple() || VT == MVT::Other)
1551 return false;
1552
1553 if (VT.bitsLT(MVT::i32))
1554 return false;
1555
1556 // TODO: This is a rough estimate.
1557 if (IsFast)
1558 *IsFast = 1;
1559
1560 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1561}
1562
1564 SelectionDAG &DAG, SDValue VectorEntry,
1565 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1566 assert(RemapSwizzle.empty());
1567
1568 SDLoc DL(VectorEntry);
1569 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1570
1571 SDValue NewBldVec[4];
1572 for (unsigned i = 0; i < 4; i++)
1573 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1574 DAG.getIntPtrConstant(i, DL));
1575
1576 for (unsigned i = 0; i < 4; i++) {
1577 if (NewBldVec[i].isUndef())
1578 // We mask write here to teach later passes that the ith element of this
1579 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1580 // break false dependencies and additionally make assembly easier to read.
1581 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1582 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1583 if (C->isZero()) {
1584 RemapSwizzle[i] = 4; // SEL_0
1585 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1586 } else if (C->isExactlyValue(1.0)) {
1587 RemapSwizzle[i] = 5; // SEL_1
1588 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1589 }
1590 }
1591
1592 if (NewBldVec[i].isUndef())
1593 continue;
1594
1595 for (unsigned j = 0; j < i; j++) {
1596 if (NewBldVec[i] == NewBldVec[j]) {
1597 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1598 RemapSwizzle[i] = j;
1599 break;
1600 }
1601 }
1602 }
1603
1604 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1605 NewBldVec);
1606}
1607
1609 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1610 assert(RemapSwizzle.empty());
1611
1612 SDLoc DL(VectorEntry);
1613 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1614
1615 SDValue NewBldVec[4];
1616 bool isUnmovable[4] = {false, false, false, false};
1617 for (unsigned i = 0; i < 4; i++)
1618 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1619 DAG.getIntPtrConstant(i, DL));
1620
1621 for (unsigned i = 0; i < 4; i++) {
1622 RemapSwizzle[i] = i;
1623 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1624 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1625 if (i == Idx)
1626 isUnmovable[Idx] = true;
1627 }
1628 }
1629
1630 for (unsigned i = 0; i < 4; i++) {
1631 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1632 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1633 if (isUnmovable[Idx])
1634 continue;
1635 // Swap i and Idx
1636 std::swap(NewBldVec[Idx], NewBldVec[i]);
1637 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1638 break;
1639 }
1640 }
1641
1642 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1643 NewBldVec);
1644}
1645
1646SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1647 SelectionDAG &DAG,
1648 const SDLoc &DL) const {
1649 // Old -> New swizzle values
1650 DenseMap<unsigned, unsigned> SwizzleRemap;
1651
1652 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1653 for (unsigned i = 0; i < 4; i++) {
1654 unsigned Idx = Swz[i]->getAsZExtVal();
1655 auto It = SwizzleRemap.find(Idx);
1656 if (It != SwizzleRemap.end())
1657 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1658 }
1659
1660 SwizzleRemap.clear();
1661 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1662 for (unsigned i = 0; i < 4; i++) {
1663 unsigned Idx = Swz[i]->getAsZExtVal();
1664 auto It = SwizzleRemap.find(Idx);
1665 if (It != SwizzleRemap.end())
1666 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1667 }
1668
1669 return BuildVector;
1670}
1671
1672SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1673 SelectionDAG &DAG) const {
1674 SDLoc DL(LoadNode);
1675 EVT VT = LoadNode->getValueType(0);
1676 SDValue Chain = LoadNode->getChain();
1677 SDValue Ptr = LoadNode->getBasePtr();
1678 assert (isa<ConstantSDNode>(Ptr));
1679
1680 //TODO: Support smaller loads
1681 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1682 return SDValue();
1683
1684 if (LoadNode->getAlign() < Align(4))
1685 return SDValue();
1686
1687 int ConstantBlock = ConstantAddressBlock(Block);
1688
1689 SDValue Slots[4];
1690 for (unsigned i = 0; i < 4; i++) {
1691 // We want Const position encoded with the following formula :
1692 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1693 // const_index is Ptr computed by llvm using an alignment of 16.
1694 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1695 // then div by 4 at the ISel step
1696 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1697 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1698 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1699 }
1700 EVT NewVT = MVT::v4i32;
1701 unsigned NumElements = 4;
1702 if (VT.isVector()) {
1703 NewVT = VT;
1704 NumElements = VT.getVectorNumElements();
1705 }
1706 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1707 if (!VT.isVector()) {
1708 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1709 DAG.getConstant(0, DL, MVT::i32));
1710 }
1711 SDValue MergedValues[2] = {
1712 Result,
1713 Chain
1714 };
1715 return DAG.getMergeValues(MergedValues, DL);
1716}
1717
1718//===----------------------------------------------------------------------===//
1719// Custom DAG Optimizations
1720//===----------------------------------------------------------------------===//
1721
1723 DAGCombinerInfo &DCI) const {
1724 SelectionDAG &DAG = DCI.DAG;
1725 SDLoc DL(N);
1726
1727 switch (N->getOpcode()) {
1728 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1729 case ISD::FP_ROUND: {
1730 SDValue Arg = N->getOperand(0);
1731 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1732 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1733 Arg.getOperand(0));
1734 }
1735 break;
1736 }
1737
1738 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1739 // (i32 select_cc f32, f32, -1, 0 cc)
1740 //
1741 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1742 // this to one of the SET*_DX10 instructions.
1743 case ISD::FP_TO_SINT: {
1744 SDValue FNeg = N->getOperand(0);
1745 if (FNeg.getOpcode() != ISD::FNEG) {
1746 return SDValue();
1747 }
1748 SDValue SelectCC = FNeg.getOperand(0);
1749 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1750 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1751 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1752 !isHWTrueValue(SelectCC.getOperand(2)) ||
1753 !isHWFalseValue(SelectCC.getOperand(3))) {
1754 return SDValue();
1755 }
1756
1757 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1758 SelectCC.getOperand(0), // LHS
1759 SelectCC.getOperand(1), // RHS
1760 DAG.getAllOnesConstant(DL, MVT::i32), // True
1761 DAG.getConstant(0, DL, MVT::i32), // False
1762 SelectCC.getOperand(4)); // CC
1763 }
1764
1765 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1766 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1768 SDValue InVec = N->getOperand(0);
1769 SDValue InVal = N->getOperand(1);
1770 SDValue EltNo = N->getOperand(2);
1771
1772 // If the inserted element is an UNDEF, just use the input vector.
1773 if (InVal.isUndef())
1774 return InVec;
1775
1776 EVT VT = InVec.getValueType();
1777
1778 // If we can't generate a legal BUILD_VECTOR, exit
1780 return SDValue();
1781
1782 // Check that we know which element is being inserted
1783 if (!isa<ConstantSDNode>(EltNo))
1784 return SDValue();
1785 unsigned Elt = EltNo->getAsZExtVal();
1786
1787 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1788 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1789 // vector elements.
1791 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1792 Ops.append(InVec.getNode()->op_begin(),
1793 InVec.getNode()->op_end());
1794 } else if (InVec.isUndef()) {
1795 unsigned NElts = VT.getVectorNumElements();
1796 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1797 } else {
1798 return SDValue();
1799 }
1800
1801 // Insert the element
1802 if (Elt < Ops.size()) {
1803 // All the operands of BUILD_VECTOR must have the same type;
1804 // we enforce that here.
1805 EVT OpVT = Ops[0].getValueType();
1806 if (InVal.getValueType() != OpVT)
1807 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1808 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1809 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1810 Ops[Elt] = InVal;
1811 }
1812
1813 // Return the new vector
1814 return DAG.getBuildVector(VT, DL, Ops);
1815 }
1816
1817 // Extract_vec (Build_vector) generated by custom lowering
1818 // also needs to be customly combined
1820 SDValue Arg = N->getOperand(0);
1821 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1822 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1823 unsigned Element = Const->getZExtValue();
1824 return Arg->getOperand(Element);
1825 }
1826 }
1827 if (Arg.getOpcode() == ISD::BITCAST &&
1831 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1832 unsigned Element = Const->getZExtValue();
1833 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1834 Arg->getOperand(0).getOperand(Element));
1835 }
1836 }
1837 break;
1838 }
1839
1840 case ISD::SELECT_CC: {
1841 // Try common optimizations
1843 return Ret;
1844
1845 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1846 // selectcc x, y, a, b, inv(cc)
1847 //
1848 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1849 // selectcc x, y, a, b, cc
1850 SDValue LHS = N->getOperand(0);
1851 if (LHS.getOpcode() != ISD::SELECT_CC) {
1852 return SDValue();
1853 }
1854
1855 SDValue RHS = N->getOperand(1);
1856 SDValue True = N->getOperand(2);
1857 SDValue False = N->getOperand(3);
1858 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1859
1860 if (LHS.getOperand(2).getNode() != True.getNode() ||
1861 LHS.getOperand(3).getNode() != False.getNode() ||
1862 RHS.getNode() != False.getNode()) {
1863 return SDValue();
1864 }
1865
1866 switch (NCC) {
1867 default: return SDValue();
1868 case ISD::SETNE: return LHS;
1869 case ISD::SETEQ: {
1870 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1871 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1872 if (DCI.isBeforeLegalizeOps() ||
1873 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1874 return DAG.getSelectCC(DL,
1875 LHS.getOperand(0),
1876 LHS.getOperand(1),
1877 LHS.getOperand(2),
1878 LHS.getOperand(3),
1879 LHSCC);
1880 break;
1881 }
1882 }
1883 return SDValue();
1884 }
1885
1887 SDValue Arg = N->getOperand(1);
1888 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1889 break;
1890
1891 SDValue NewArgs[8] = {
1892 N->getOperand(0), // Chain
1893 SDValue(),
1894 N->getOperand(2), // ArrayBase
1895 N->getOperand(3), // Type
1896 N->getOperand(4), // SWZ_X
1897 N->getOperand(5), // SWZ_Y
1898 N->getOperand(6), // SWZ_Z
1899 N->getOperand(7) // SWZ_W
1900 };
1901 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1902 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1903 }
1905 SDValue Arg = N->getOperand(1);
1906 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1907 break;
1908
1909 SDValue NewArgs[19] = {
1910 N->getOperand(0),
1911 N->getOperand(1),
1912 N->getOperand(2),
1913 N->getOperand(3),
1914 N->getOperand(4),
1915 N->getOperand(5),
1916 N->getOperand(6),
1917 N->getOperand(7),
1918 N->getOperand(8),
1919 N->getOperand(9),
1920 N->getOperand(10),
1921 N->getOperand(11),
1922 N->getOperand(12),
1923 N->getOperand(13),
1924 N->getOperand(14),
1925 N->getOperand(15),
1926 N->getOperand(16),
1927 N->getOperand(17),
1928 N->getOperand(18),
1929 };
1930 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1931 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1932 }
1933
1934 case ISD::LOAD: {
1935 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1936 SDValue Ptr = LoadNode->getBasePtr();
1937 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1938 isa<ConstantSDNode>(Ptr))
1939 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1940 break;
1941 }
1942
1943 default: break;
1944 }
1945
1947}
1948
1949bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1950 SDValue &Src, SDValue &Neg, SDValue &Abs,
1951 SDValue &Sel, SDValue &Imm,
1952 SelectionDAG &DAG) const {
1953 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1954 if (!Src.isMachineOpcode())
1955 return false;
1956
1957 switch (Src.getMachineOpcode()) {
1958 case R600::FNEG_R600:
1959 if (!Neg.getNode())
1960 return false;
1961 Src = Src.getOperand(0);
1962 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1963 return true;
1964 case R600::FABS_R600:
1965 if (!Abs.getNode())
1966 return false;
1967 Src = Src.getOperand(0);
1968 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1969 return true;
1970 case R600::CONST_COPY: {
1971 unsigned Opcode = ParentNode->getMachineOpcode();
1972 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1973
1974 if (!Sel.getNode())
1975 return false;
1976
1977 SDValue CstOffset = Src.getOperand(0);
1978 if (ParentNode->getValueType(0).isVector())
1979 return false;
1980
1981 // Gather constants values
1982 int SrcIndices[] = {
1983 TII->getOperandIdx(Opcode, R600::OpName::src0),
1984 TII->getOperandIdx(Opcode, R600::OpName::src1),
1985 TII->getOperandIdx(Opcode, R600::OpName::src2),
1986 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1987 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1988 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1989 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1990 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1991 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1992 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1993 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1994 };
1995 std::vector<unsigned> Consts;
1996 for (int OtherSrcIdx : SrcIndices) {
1997 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1998 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1999 continue;
2000 if (HasDst) {
2001 OtherSrcIdx--;
2002 OtherSelIdx--;
2003 }
2004 if (RegisterSDNode *Reg =
2005 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2006 if (Reg->getReg() == R600::ALU_CONST) {
2007 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2008 }
2009 }
2010 }
2011
2012 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2013 Consts.push_back(Cst->getZExtValue());
2014 if (!TII->fitsConstReadLimitations(Consts)) {
2015 return false;
2016 }
2017
2018 Sel = CstOffset;
2019 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2020 return true;
2021 }
2022 case R600::MOV_IMM_GLOBAL_ADDR:
2023 // Check if the Imm slot is used. Taken from below.
2024 if (Imm->getAsZExtVal())
2025 return false;
2026 Imm = Src.getOperand(0);
2027 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2028 return true;
2029 case R600::MOV_IMM_I32:
2030 case R600::MOV_IMM_F32: {
2031 unsigned ImmReg = R600::ALU_LITERAL_X;
2032 uint64_t ImmValue = 0;
2033
2034 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2035 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2036 float FloatValue = FPC->getValueAPF().convertToFloat();
2037 if (FloatValue == 0.0) {
2038 ImmReg = R600::ZERO;
2039 } else if (FloatValue == 0.5) {
2040 ImmReg = R600::HALF;
2041 } else if (FloatValue == 1.0) {
2042 ImmReg = R600::ONE;
2043 } else {
2044 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2045 }
2046 } else {
2047 uint64_t Value = Src.getConstantOperandVal(0);
2048 if (Value == 0) {
2049 ImmReg = R600::ZERO;
2050 } else if (Value == 1) {
2051 ImmReg = R600::ONE_INT;
2052 } else {
2053 ImmValue = Value;
2054 }
2055 }
2056
2057 // Check that we aren't already using an immediate.
2058 // XXX: It's possible for an instruction to have more than one
2059 // immediate operand, but this is not supported yet.
2060 if (ImmReg == R600::ALU_LITERAL_X) {
2061 if (!Imm.getNode())
2062 return false;
2063 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2064 if (C->getZExtValue())
2065 return false;
2066 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2067 }
2068 Src = DAG.getRegister(ImmReg, MVT::i32);
2069 return true;
2070 }
2071 default:
2072 return false;
2073 }
2074}
2075
2076/// Fold the instructions after selecting them
2077SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2078 SelectionDAG &DAG) const {
2079 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2080 if (!Node->isMachineOpcode())
2081 return Node;
2082
2083 unsigned Opcode = Node->getMachineOpcode();
2084 SDValue FakeOp;
2085
2086 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2087
2088 if (Opcode == R600::DOT_4) {
2089 int OperandIdx[] = {
2090 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2091 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2092 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2093 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2094 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2095 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2096 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2097 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2098 };
2099 int NegIdx[] = {
2100 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2101 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2102 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2103 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2104 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2105 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2106 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2107 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2108 };
2109 int AbsIdx[] = {
2110 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2111 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2112 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2113 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2114 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2115 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2116 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2117 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2118 };
2119 for (unsigned i = 0; i < 8; i++) {
2120 if (OperandIdx[i] < 0)
2121 return Node;
2122 SDValue &Src = Ops[OperandIdx[i] - 1];
2123 SDValue &Neg = Ops[NegIdx[i] - 1];
2124 SDValue &Abs = Ops[AbsIdx[i] - 1];
2125 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2126 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2127 if (HasDst)
2128 SelIdx--;
2129 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2130 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2131 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2132 }
2133 } else if (Opcode == R600::REG_SEQUENCE) {
2134 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2135 SDValue &Src = Ops[i];
2136 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2137 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2138 }
2139 } else {
2140 if (!TII->hasInstrModifiers(Opcode))
2141 return Node;
2142 int OperandIdx[] = {
2143 TII->getOperandIdx(Opcode, R600::OpName::src0),
2144 TII->getOperandIdx(Opcode, R600::OpName::src1),
2145 TII->getOperandIdx(Opcode, R600::OpName::src2)
2146 };
2147 int NegIdx[] = {
2148 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2149 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2150 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2151 };
2152 int AbsIdx[] = {
2153 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2154 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2155 -1
2156 };
2157 for (unsigned i = 0; i < 3; i++) {
2158 if (OperandIdx[i] < 0)
2159 return Node;
2160 SDValue &Src = Ops[OperandIdx[i] - 1];
2161 SDValue &Neg = Ops[NegIdx[i] - 1];
2162 SDValue FakeAbs;
2163 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2164 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2165 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2166 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2167 if (HasDst) {
2168 SelIdx--;
2169 ImmIdx--;
2170 }
2171 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2172 SDValue &Imm = Ops[ImmIdx];
2173 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2174 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2175 }
2176 }
2177
2178 return Node;
2179}
2180
2182R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2183 switch (RMW->getOperation()) {
2192 // FIXME: Cayman at least appears to have instructions for this, but the
2193 // instruction defintions appear to be missing.
2195 case AtomicRMWInst::Xchg: {
2196 const DataLayout &DL = RMW->getFunction()->getDataLayout();
2197 unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
2198 if (ValSize == 32 || ValSize == 64)
2201 }
2202 default:
2203 if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
2204 unsigned Size = IntTy->getBitWidth();
2205 if (Size == 32 || Size == 64)
2207 }
2208
2210 }
2211
2212 llvm_unreachable("covered atomicrmw op switch");
2213}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Interfaces for producing common pass manager configurations.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
#define MO_FLAG_NEG
Definition: R600Defines.h:15
#define MO_FLAG_ABS
Definition: R600Defines.h:16
#define MO_FLAG_MASK
Definition: R600Defines.h:17
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:6143
APInt bitcastToAPInt() const
Definition: APFloat.h:1353
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
@ FAdd
*p = old + v
Definition: Instructions.h:746
@ FSub
*p = old - v
Definition: Instructions.h:749
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:757
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:753
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
BinOp getOperation() const
Definition: Instructions.h:819
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1833
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool empty() const
Definition: DenseMap.h:119
iterator end()
Definition: DenseMap.h:87
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:363
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
bool hasFFBH() const
bool hasFMA() const
bool hasBFI() const
Definition: R600Subtarget.h:86
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:53
bool hasCARRY() const
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:61
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:51
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:90
bool hasBORROW() const
Definition: R600Subtarget.h:97
bool hasFFBL() const
bool hasBFE() const
Definition: R600Subtarget.h:82
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:839
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:498
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:493
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:581
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const TargetMachine & getTargetMachine() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:774
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ GlobalAddress
Definition: ISDOpcodes.h:88
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:410
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:275
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:773
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:1002
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1187
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:535
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:778
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:941
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:979
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1180
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:521
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1685
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
int getLDSNoRetOp(uint16_t Opcode)
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr float pif
Definition: MathExtras.h:68
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
@ Offset
Definition: DWP.cpp:477
@ Read
Definition: CodeGenData.h:108
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
DWARFExpression::Operation Op
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition: Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...