LLVM: lib/Target/AArch64/AArch64CallingConvention.cpp Source File

//=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file contains the table-generated and custom routines for the AArch64

// Calling Convention.

//

//===----------------------------------------------------------------------===//


#include "AArch64CallingConvention.h"

#include "AArch64.h"

#include "AArch64InstrInfo.h"

#include "AArch64Subtarget.h"

#include "llvm/CodeGen/CallingConvLower.h"

#include "llvm/CodeGen/TargetInstrInfo.h"

using namespace llvm;


static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,

                                     AArch64::X3, AArch64::X4, AArch64::X5,

                                     AArch64::X6, AArch64::X7};

static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,

                                     AArch64::H3, AArch64::H4, AArch64::H5,

                                     AArch64::H6, AArch64::H7};

static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,

                                     AArch64::S3, AArch64::S4, AArch64::S5,

                                     AArch64::S6, AArch64::S7};

static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,

                                     AArch64::D3, AArch64::D4, AArch64::D5,

                                     AArch64::D6, AArch64::D7};

static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,

                                     AArch64::Q3, AArch64::Q4, AArch64::Q5,

                                     AArch64::Q6, AArch64::Q7};

static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,

                                     AArch64::Z3, AArch64::Z4, AArch64::Z5,

                                     AArch64::Z6, AArch64::Z7};

static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2,

                                     AArch64::P3};


static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,

                             MVT LocVT, ISD::ArgFlagsTy &ArgFlags,

                             CCState &State, Align SlotAlign) {

  if (LocVT.isScalableVector()) {

    const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(

        State.getMachineFunction().getSubtarget());

    const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();


    // We are about to reinvoke the CCAssignFn auto-generated handler. If we

    // don't unset these flags we will get stuck in an infinite loop forever

    // invoking the custom handler.

    ArgFlags.setInConsecutiveRegs(false);

    ArgFlags.setInConsecutiveRegsLast(false);


    // The calling convention for passing SVE tuples states that in the event

    // we cannot allocate enough registers for the tuple we should still leave

    // any remaining registers unallocated. However, when we call the

    // CCAssignFn again we want it to behave as if all remaining registers are

    // allocated. This will force the code to pass the tuple indirectly in

    // accordance with the PCS.

    bool ZRegsAllocated[8];

    for (int I = 0; I < 8; I++) {

      ZRegsAllocated[I] = State.isAllocated(ZRegList[I]);

      State.AllocateReg(ZRegList[I]);

    }

    // The same applies to P registers.

    bool PRegsAllocated[4];

    for (int I = 0; I < 4; I++) {

      PRegsAllocated[I] = State.isAllocated(PRegList[I]);

      State.AllocateReg(PRegList[I]);

    }


    auto &It = PendingMembers[0];

    CCAssignFn *AssignFn =

        TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);

    // FIXME: Get the correct original type.

    Type *OrigTy = EVT(It.getValVT()).getTypeForEVT(State.getContext());

    if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,

                 ArgFlags, OrigTy, State))

      llvm_unreachable("Call operand has unhandled type");


    // Return the flags to how they were before.

    ArgFlags.setInConsecutiveRegs(true);

    ArgFlags.setInConsecutiveRegsLast(true);


    // Return the register state back to how it was before, leaving any

    // unallocated registers available for other smaller types.

    for (int I = 0; I < 8; I++)

      if (!ZRegsAllocated[I])

        State.DeallocateReg(ZRegList[I]);

    for (int I = 0; I < 4; I++)

      if (!PRegsAllocated[I])

        State.DeallocateReg(PRegList[I]);


    // All pending members have now been allocated

    PendingMembers.clear();

    return true;

  }


  unsigned Size = LocVT.getSizeInBits() / 8;

  for (auto &It : PendingMembers) {

    It.convertToMem(State.AllocateStack(Size, SlotAlign));

    State.addLoc(It);

    SlotAlign = Align(1);

  }


  // All pending members have now been allocated

  PendingMembers.clear();

  return true;

}


/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An

/// [N x Ty] type must still be contiguous in memory though.

static bool CC_AArch64_Custom_Stack_Block(

      unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,

      ISD::ArgFlagsTy &ArgFlags, CCState &State) {

  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();


  // Add the argument to the list to be allocated once we know the size of the

  // block.

  PendingMembers.push_back(

      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));


  if (!ArgFlags.isInConsecutiveRegsLast())

    return true;


  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));

}


/// Given an [N x Ty] block, it should be passed in a consecutive sequence of

/// registers. If no such sequence is available, mark the rest of the registers

/// of that type as used and place the argument on the stack.

static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,

                                    CCValAssign::LocInfo &LocInfo,

                                    ISD::ArgFlagsTy &ArgFlags, CCState &State) {

  const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(

      State.getMachineFunction().getSubtarget());

  bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();


  // Try to allocate a contiguous block of registers, each of the correct

  // size to hold one member.

  ArrayRef<MCPhysReg> RegList;

  if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))

    RegList = XRegList;

  else if (LocVT.SimpleTy == MVT::f16 || LocVT.SimpleTy == MVT::bf16)

    RegList = HRegList;

  else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())

    RegList = SRegList;

  else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())

    RegList = DRegList;

  else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())

    RegList = QRegList;

  else if (LocVT.isScalableVector()) {

    // Scalable masks should be pass by Predicate registers.

    if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 ||

        LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 ||

        LocVT == MVT::aarch64svcount)

      RegList = PRegList;

    else

      RegList = ZRegList;

  } else {

    // Not an array we want to split up after all.

    return false;

  }


  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();


  // Add the argument to the list to be allocated once we know the size of the

  // block.

  PendingMembers.push_back(

      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));


  if (!ArgFlags.isInConsecutiveRegsLast())

    return true;


  // [N x i32] arguments get packed into x-registers on Darwin's arm64_32

  // because that's how the armv7k Clang front-end emits small structs.

  unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;

  ArrayRef<MCPhysReg> RegResult = State.AllocateRegBlock(

      RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);

  if (!RegResult.empty() && EltsPerReg == 1) {

    for (const auto &[It, Reg] : zip(PendingMembers, RegResult)) {

      It.convertToReg(Reg);

      State.addLoc(It);

    }

    PendingMembers.clear();

    return true;

  } else if (!RegResult.empty()) {

    assert(EltsPerReg == 2 && "unexpected ABI");

    bool UseHigh = false;

    CCValAssign::LocInfo Info;

    unsigned RegIdx = 0;

    for (auto &It : PendingMembers) {

      Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;

      State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32,

                                       RegResult[RegIdx], MVT::i64, Info));

      UseHigh = !UseHigh;

      if (!UseHigh)

        ++RegIdx;

    }

    PendingMembers.clear();

    return true;

  }


  if (!LocVT.isScalableVector()) {

    // Mark all regs in the class as unavailable

    for (auto Reg : RegList)

      State.AllocateReg(Reg);

  }


  const MaybeAlign StackAlign =

      State.getMachineFunction().getDataLayout().getStackAlignment();

  assert(StackAlign && "data layout string is missing stack alignment");

  const Align MemAlign = ArgFlags.getNonZeroMemAlign();

  Align SlotAlign = std::min(MemAlign, *StackAlign);

  if (!Subtarget.isTargetDarwin())

    SlotAlign = std::max(SlotAlign, Align(8));


  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);

}


// TableGen provides definitions of the calling convention analysis entry

// points.

#include "AArch64GenCallingConv.inc"

finishStackBlock
static bool finishStackBlock(SmallVectorImpl< CCValAssign > &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State, Align SlotAlign)
Definition: AArch64CallingConvention.cpp:43

XRegList
static const MCPhysReg XRegList[]
Definition: AArch64CallingConvention.cpp:22

SRegList
static const MCPhysReg SRegList[]
Definition: AArch64CallingConvention.cpp:28

CC_AArch64_Custom_Block
static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
Given an [N x Ty] block, it should be passed in a consecutive sequence of registers.
Definition: AArch64CallingConvention.cpp:135

ZRegList
static const MCPhysReg ZRegList[]
Definition: AArch64CallingConvention.cpp:37

DRegList
static const MCPhysReg DRegList[]
Definition: AArch64CallingConvention.cpp:31

HRegList
static const MCPhysReg HRegList[]
Definition: AArch64CallingConvention.cpp:25

QRegList
static const MCPhysReg QRegList[]
Definition: AArch64CallingConvention.cpp:34

PRegList
static const MCPhysReg PRegList[]
Definition: AArch64CallingConvention.cpp:40

CC_AArch64_Custom_Stack_Block
static bool CC_AArch64_Custom_Stack_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
The Darwin variadic PCS places anonymous arguments in 8-byte stack slots.
Definition: AArch64CallingConvention.cpp:116

AArch64CallingConvention.h

AArch64InstrInfo.h

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AArch64Subtarget.h

AArch64.h

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

CallingConvLower.h

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

I
#define I(x, y, z)
Definition: MD5.cpp:58

TargetInstrInfo.h

llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38

llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: AArch64Subtarget.h:291

llvm::AArch64Subtarget::isTargetILP32
bool isTargetILP32() const
Definition: AArch64Subtarget.h:303

llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:146

llvm::AArch64Subtarget::isTargetMachO
bool isTargetMachO() const
Definition: AArch64Subtarget.h:301

llvm::AArch64TargetLowering
Definition: AArch64ISelLowering.h:64

llvm::AArch64TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
Definition: AArch64ISelLowering.cpp:7830

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142

llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:171

llvm::CCState::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: CallingConvLower.h:241

llvm::CCState::getCallingConv
CallingConv::ID getCallingConv() const
Definition: CallingConvLower.h:242

llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition: CallingConvLower.h:333

llvm::CCState::getContext
LLVMContext & getContext() const
Definition: CallingConvLower.h:240

llvm::CCState::AllocateRegBlock
ArrayRef< MCPhysReg > AllocateRegBlock(ArrayRef< MCPhysReg > Regs, unsigned RegsRequired)
Attempt to allocate a block of RegsRequired consecutive registers.
Definition: CallingConvLower.h:366

llvm::CCState::AllocateStack
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Definition: CallingConvLower.h:408

llvm::CCState::DeallocateReg
void DeallocateReg(MCPhysReg Reg)
Definition: CallingConvLower.h:325

llvm::CCState::getPendingLocs
SmallVectorImpl< CCValAssign > & getPendingLocs()
Definition: CallingConvLower.h:486

llvm::CCState::isAllocated
bool isAllocated(MCRegister Reg) const
isAllocated - Return true if the specified register (or an alias) is allocated.
Definition: CallingConvLower.h:257

llvm::CCState::addLoc
void addLoc(const CCValAssign &V)
Definition: CallingConvLower.h:236

llvm::CCValAssign::getPending
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Definition: CallingConvLower.h:109

llvm::CCValAssign::getReg
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Definition: CallingConvLower.h:85

llvm::CCValAssign::LocInfo
LocInfo
Definition: CallingConvLower.h:36

llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:39

llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:37

llvm::CCValAssign::AExtUpper
@ AExtUpper
Definition: CallingConvLower.h:45

llvm::DataLayout::getStackAlignment
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition: DataLayout.h:228

llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:36

llvm::MVT::is128BitVector
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: MachineValueType.h:157

llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:56

llvm::MVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
Definition: MachineValueType.h:114

llvm::MVT::is32BitVector
bool is32BitVector() const
Return true if this is a 32-bit vector type.
Definition: MachineValueType.h:147

llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:309

llvm::MVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: MachineValueType.h:152

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:762

llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:309

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:79

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574

llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:611

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:414

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

uint16_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:164

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::zip
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860

llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:157

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35

llvm::EVT::getTypeForEVT
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:216

llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27

llvm::ISD::ArgFlagsTy::setInConsecutiveRegs
void setInConsecutiveRegs(bool Flag=true)
Definition: TargetCallingConv.h:131

llvm::ISD::ArgFlagsTy::isInConsecutiveRegsLast
bool isInConsecutiveRegsLast() const
Definition: TargetCallingConv.h:133

llvm::ISD::ArgFlagsTy::getNonZeroMemAlign
Align getNonZeroMemAlign() const
Definition: TargetCallingConv.h:153

llvm::ISD::ArgFlagsTy::setInConsecutiveRegsLast
void setInConsecutiveRegsLast(bool Flag=true)
Definition: TargetCallingConv.h:134

llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117