LLVM 22.0.0git
ARMSubtarget.cpp
Go to the documentation of this file.
1//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ARM specific subclass of TargetSubtargetInfo.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ARM.h"
14
15#include "ARMCallLowering.h"
16#include "ARMFrameLowering.h"
17#include "ARMInstrInfo.h"
18#include "ARMLegalizerInfo.h"
19#include "ARMRegisterBankInfo.h"
20#include "ARMSubtarget.h"
21#include "ARMTargetMachine.h"
23#include "Thumb1FrameLowering.h"
24#include "Thumb1InstrInfo.h"
25#include "Thumb2InstrInfo.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/ADT/Twine.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/GlobalValue.h"
33#include "llvm/MC/MCAsmInfo.h"
40
41using namespace llvm;
42
43#define DEBUG_TYPE "arm-subtarget"
44
45#define GET_SUBTARGETINFO_TARGET_DESC
46#define GET_SUBTARGETINFO_CTOR
47#include "ARMGenSubtargetInfo.inc"
48
49static cl::opt<bool>
50UseFusedMulOps("arm-use-mulops",
51 cl::init(true), cl::Hidden);
52
57
58static cl::opt<ITMode>
59 IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
60 cl::values(clEnumValN(DefaultIT, "arm-default-it",
61 "Generate any type of IT block"),
62 clEnumValN(RestrictedIT, "arm-restrict-it",
63 "Disallow complex IT blocks")));
64
65/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
66/// currently supported (for testing only).
67static cl::opt<bool>
68ForceFastISel("arm-force-fast-isel",
69 cl::init(false), cl::Hidden);
70
71/// initializeSubtargetDependencies - Initializes using a CPU and feature string
72/// so that we can use initializer lists for subtarget initialization.
74 StringRef FS) {
75 initSubtargetFeatures(CPU, FS);
76 return *this;
77}
78
79ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
80 StringRef FS) {
82 if (STI.isThumb1Only())
83 return (ARMFrameLowering *)new Thumb1FrameLowering(STI);
84
85 return new ARMFrameLowering(STI);
86}
87
88ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
89 const std::string &FS,
90 const ARMBaseTargetMachine &TM, bool IsLittle,
91 bool MinSize, DenormalMode DM)
92 : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
95 FrameLowering(initializeFrameLowering(CPU, FS)),
96 // At this point initializeSubtargetDependencies has been called so
97 // we can query directly.
98 InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
99 : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
100 : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
101 TLInfo(TM, *this) {
102
103 CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering()));
104 Legalizer.reset(new ARMLegalizerInfo(*this));
105
106 auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo());
107
108 // FIXME: At this point, we can't rely on Subtarget having RBI.
109 // It's awkward to mix passing RBI and the Subtarget; should we pass
110 // TII/TRI as well?
111 InstSelector.reset(createARMInstructionSelector(TM, *this, *RBI));
112
113 RegBankInfo.reset(RBI);
114}
115
117 return CallLoweringInfo.get();
118}
119
121 return InstSelector.get();
122}
123
125 return Legalizer.get();
126}
127
129 return RegBankInfo.get();
130}
131
133 // We don't currently suppport Thumb, but Windows requires Thumb.
134 return hasV6Ops() && hasARMOps() && !isTargetWindows();
135}
136
137void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
138 if (CPUString.empty()) {
139 CPUString = "generic";
140
141 if (isTargetDarwin()) {
143 ARM::ArchKind AK = ARM::parseArch(ArchName);
144 if (AK == ARM::ArchKind::ARMV7S)
145 // Default to the Swift CPU when targeting armv7s/thumbv7s.
146 CPUString = "swift";
147 else if (AK == ARM::ArchKind::ARMV7K)
148 // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
149 // ARMv7k does not use SjLj exception handling.
150 CPUString = "cortex-a7";
151 }
152 }
153
154 // Insert the architecture feature derived from the target triple into the
155 // feature string. This is important for setting features that are implied
156 // based on the architecture version.
157 std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString);
158 if (!FS.empty()) {
159 if (!ArchFS.empty())
160 ArchFS = (Twine(ArchFS) + "," + FS).str();
161 else
162 ArchFS = std::string(FS);
163 }
164 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, ArchFS);
165
166 // FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
167 // Assert this for now to make the change obvious.
168 assert(hasV6T2Ops() || !hasThumb2());
169
170 if (genExecuteOnly()) {
171 // Execute only support for >= v8-M Baseline requires movt support
172 if (hasV8MBaselineOps())
173 NoMovt = false;
174 if (!hasV6MOps())
175 report_fatal_error("Cannot generate execute-only code for this target");
176 }
177
178 // Keep a pointer to static instruction cost data for the specified CPU.
179 SchedModel = getSchedModelForCPU(CPUString);
180
181 // Initialize scheduling itinerary for the specified CPU.
182 InstrItins = getInstrItineraryForCPU(CPUString);
183
184 // FIXME: this is invalid for WindowsCE
185 if (isTargetWindows())
186 NoARM = true;
187
188 if (TM.isAAPCS_ABI())
190 if (TM.isAAPCS16_ABI())
191 stackAlignment = Align(16);
192
193 // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
194 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
195 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
196 // support in the assembler and linker to be used. This would need to be
197 // fixed to fully support tail calls in Thumb1.
198 //
199 // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M
200 // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This
201 // means if we need to reload LR, it takes extra instructions, which outweighs
202 // the value of the tail call; but here we don't know yet whether LR is going
203 // to be used. We take the optimistic approach of generating the tail call and
204 // perhaps taking a hit if we need to restore the LR.
205
206 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
207 // but we need to make sure there are enough registers; the only valid
208 // registers are the 4 used for parameters. We don't currently do this
209 // case.
210
211 SupportsTailCall = !isThumb1Only() || hasV8MBaselineOps();
212
213 switch (IT) {
214 case DefaultIT:
215 RestrictIT = false;
216 break;
217 case RestrictedIT:
218 RestrictIT = true;
219 break;
220 }
221
222 // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
223 const FeatureBitset &Bits = getFeatureBits();
224 if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
226 HasNEONForFP = true;
227
228 if (isRWPI())
229 ReserveR9 = true;
230
231 // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2
232 if (MVEVectorCostFactor == 0)
234
235 // FIXME: Teach TableGen to deal with these instead of doing it manually here.
236 switch (ARMProcFamily) {
237 case Others:
238 case CortexA5:
239 break;
240 case CortexA7:
242 break;
243 case CortexA8:
245 break;
246 case CortexA9:
249 break;
250 case CortexA12:
251 break;
252 case CortexA15:
256 break;
257 case CortexA17:
258 case CortexA32:
259 case CortexA35:
260 case CortexA53:
261 case CortexA55:
262 case CortexA57:
263 case CortexA72:
264 case CortexA73:
265 case CortexA75:
266 case CortexA76:
267 case CortexA77:
268 case CortexA78:
269 case CortexA78AE:
270 case CortexA78C:
271 case CortexA510:
272 case CortexA710:
273 case CortexR4:
274 case CortexR5:
275 case CortexR7:
276 case CortexM3:
277 case CortexM55:
278 case CortexM7:
279 case CortexM85:
280 case CortexR52:
281 case CortexR52plus:
282 case CortexX1:
283 case CortexX1C:
284 break;
285 case Exynos:
288 if (!isThumb())
290 break;
291 case Kryo:
292 break;
293 case Krait:
295 break;
296 case NeoverseV1:
297 break;
298 case Swift:
303 break;
304 }
305}
306
308 return TM.getRelocationModel() == Reloc::ROPI ||
309 TM.getRelocationModel() == Reloc::ROPI_RWPI;
310}
312 return TM.getRelocationModel() == Reloc::RWPI ||
313 TM.getRelocationModel() == Reloc::ROPI_RWPI;
314}
315
317 if (!TM.shouldAssumeDSOLocal(GV))
318 return true;
319
320 // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
321 // the section that is being relocated. This means we have to use o load even
322 // for GVs that are known to be local to the dso.
323 if (isTargetMachO() && TM.isPositionIndependent() &&
325 return true;
326
327 return false;
328}
329
331 return isTargetELF() && TM.isPositionIndependent() && !GV->isDSOLocal();
332}
333
335 return SchedModel.MispredictPenalty;
336}
337
339 // The MachineScheduler can increase register usage, so we use more high
340 // registers and end up with more T2 instructions that cannot be converted to
341 // T1 instructions. At least until we do better at converting to thumb1
342 // instructions, on cortex-m at Oz where we are size-paranoid, don't use the
343 // Machine scheduler, relying on the DAG register pressure scheduler instead.
344 if (isMClass() && hasMinSize())
345 return false;
346 // Enable the MachineScheduler before register allocation for subtargets
347 // with the use-misched feature.
348 return useMachineScheduler();
349}
350
352 // Enable SubRegLiveness for MVE to better optimize s subregs for mqpr regs
353 // and q subregs for qqqqpr regs.
354 return hasMVEIntegerOps();
355}
356
358 // Enable the MachinePipeliner before register allocation for subtargets
359 // with the use-mipipeliner feature.
360 return getSchedModel().hasInstrSchedModel() && useMachinePipeliner();
361}
362
363bool ARMSubtarget::useDFAforSMS() const { return false; }
364
365// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
368 return false;
369 if (disablePostRAScheduler())
370 return false;
371 // Thumb1 cores will generally not benefit from post-ra scheduling
372 return !isThumb1Only();
373}
374
377 return false;
378 if (disablePostRAScheduler())
379 return false;
380 return !isThumb1Only();
381}
382
384 // For general targets, the prologue can grow when VFPs are allocated with
385 // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
386 // format which it's more important to get right.
387 return isTargetWatchABI() ||
388 (useWideStrideVFP() && !OptMinSize);
389}
390
392 // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
393 // immediates as it is inherently position independent, and may be out of
394 // range otherwise.
395 return !NoMovt && hasV8MBaselineOps() &&
396 (isTargetWindows() || !OptMinSize || genExecuteOnly());
397}
398
400 // Enable fast-isel for any target, for testing only.
401 if (ForceFastISel)
402 return true;
403
404 // Limit fast-isel to the targets that are or have been tested.
405 if (!hasV6Ops())
406 return false;
407
408 // Thumb2 support on iOS; ARM support on iOS and Linux.
409 return TM.Options.EnableFastISel && ((isTargetMachO() && !isThumb1Only()) ||
410 (isTargetLinux() && !isThumb()));
411}
412
414 // The GPR register class has multiple possible allocation orders, with
415 // tradeoffs preferred by different sub-architectures and optimisation goals.
416 // The allocation orders are:
417 // 0: (the default tablegen order, not used)
418 // 1: r14, r0-r13
419 // 2: r0-r7
420 // 3: r0-r7, r12, lr, r8-r11
421 // Note that the register allocator will change this order so that
422 // callee-saved registers are used later, as they require extra work in the
423 // prologue/epilogue (though we sometimes override that).
424
425 // For thumb1-only targets, only the low registers are allocatable.
426 if (isThumb1Only())
427 return 2;
428
429 // Allocate low registers first, so we can select more 16-bit instructions.
430 // We also (in ignoreCSRForAllocationOrder) override the default behaviour
431 // with regards to callee-saved registers, because pushing extra registers is
432 // much cheaper (in terms of code size) than using high registers. After
433 // that, we allocate r12 (doesn't need to be saved), lr (saving it means we
434 // can return with the pop, don't need an extra "bx lr") and then the rest of
435 // the high registers.
436 if (isThumb2() && MF.getFunction().hasMinSize())
437 return 3;
438
439 // Otherwise, allocate in the default order, using LR first because saving it
440 // allows a shorter epilogue sequence.
441 return 1;
442}
443
445 MCRegister PhysReg) const {
446 // To minimize code size in Thumb2, we prefer the usage of low regs (lower
447 // cost per use) so we can use narrow encoding. By default, caller-saved
448 // registers (e.g. lr, r12) are always allocated first, regardless of
449 // their cost per use. When optForMinSize, we prefer the low regs even if
450 // they are CSR because usually push/pop can be folded into existing ones.
451 return isThumb2() && MF.getFunction().hasMinSize() &&
452 ARM::GPRRegClass.contains(PhysReg);
453}
454
457 const Function &F = MF.getFunction();
458 const MachineFrameInfo &MFI = MF.getFrameInfo();
459
460 // Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
461 // cannot use high registers except for lr.
462 if (isThumb1Only())
463 return SplitR7;
464
465 // If R7 is the frame pointer, we must split at R7 to ensure that the
466 // previous frame pointer (R7) and return address (LR) are adjacent on the
467 // stack, to form a valid frame record.
468 if (getFramePointerReg() == ARM::R7 &&
470 return SplitR7;
471
472 // Returns SplitR11WindowsSEH when the stack pointer needs to be
473 // restored from the frame pointer r11 + an offset and Windows CFI is enabled.
474 // This stack unwinding cannot be expressed with SEH unwind opcodes when done
475 // with a single push, making it necessary to split the push into r4-r10, and
476 // another containing r11+lr.
477 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
478 F.needsUnwindTableEntry() &&
479 (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
480 return SplitR11WindowsSEH;
481
482 // Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
483 // and LR to be adjacent on the stack, and branch signing is enabled,
484 // requiring R12 to be on the stack.
486 getFramePointerReg() == ARM::R11 &&
488 return SplitR11AAPCSSignRA;
489 return NoSplit;
490}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for ARM.
This file declares the targeting of the RegisterBankInfo class for ARM.
static cl::opt< bool > UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden)
static cl::opt< bool > ForceFastISel("arm-force-fast-isel", cl::init(false), cl::Hidden)
ForceFastISel - Use the fast-isel, even for subtargets where it is not currently supported (for testi...
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
ITMode
@ RestrictedIT
@ DefaultIT
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define F(x, y, z)
Definition MD5.cpp:55
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
This class provides the information for the target register banks.
bool useFastISel() const
True if fast-isel is used.
bool isTargetMachO() const
bool IsLittle
IsLittle - The target is Little Endian.
bool enablePostRAScheduler() const override
True for some subtargets at > -O0.
ARMLdStMultipleTiming LdStMultipleTiming
What kind of timing do load multiple/store multiple have (double issue, single issue etc).
bool hasARMOps() const
unsigned getGPRAllocationOrder(const MachineFunction &MF) const
const RegisterBankInfo * getRegBankInfo() const override
unsigned MaxInterleaveFactor
const ARMBaseTargetMachine & TM
bool isThumb1Only() const
ARMProcFamilyEnum ARMProcFamily
ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
bool isThumb2() const
bool useDFAforSMS() const override
MCPhysReg getFramePointerReg() const
DenormalMode DM
DM - Denormal mode NEON and VFP RunFast mode are not IEEE 754 compliant, use this field to determine ...
bool isTargetWindows() const
bool enableSubRegLiveness() const override
Check whether this subtarget wants to use subregister liveness.
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned MVEVectorCostFactor
The cost factor for MVE instructions, representing the multiple beats an.
const ARMTargetLowering * getTargetLowering() const override
ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle, bool MinSize=false, DenormalMode DM=DenormalMode::getIEEE())
This constructor initializes the data members to match that of the specified triple.
MCSchedModel SchedModel
SchedModel - Processor specific instruction costs.
std::string CPUString
CPUString - String name of used CPU.
unsigned getMispredictionPenalty() const
unsigned PreferBranchLogAlignment
What alignment is preferred for loop bodies and functions, in log2(bytes).
Triple TargetTriple
TargetTriple - What processor and OS we're targeting.
bool enableMachineScheduler() const override
Returns true if machine scheduler should be enabled.
bool isTargetDarwin() const
const ARMBaseRegisterInfo * getRegisterInfo() const override
InstrItineraryData InstrItins
Selected instruction itineraries (one entry per itinerary class.)
bool useStride4VFPs() const
bool OptMinSize
OptMinSize - True if we're optimising for minimum code size, equal to the function attribute.
bool RestrictIT
RestrictIT - If true, the subtarget disallows generation of complex IT blocks.
bool ignoreCSRForAllocationOrder(const MachineFunction &MF, MCRegister PhysReg) const override
Align stackAlignment
stackAlignment - The minimum alignment known to hold of the stack frame on entry to the function and ...
unsigned PartialUpdateClearance
Clearance before partial register updates (in number of instructions)
bool enableMachinePipeliner() const override
Returns true if machine pipeliner should be enabled.
bool enablePostRAMachineScheduler() const override
True for some subtargets at > -O0.
InstructionSelector * getInstructionSelector() const override
bool isXRaySupported() const override
const CallLowering * getCallLowering() const override
enum PushPopSplitVariation getPushPopSplitVariation(const MachineFunction &MF) const
bool hasMinSize() const
ARMSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
initializeSubtargetDependencies - Initializes using a CPU and feature string so that we can use initi...
PushPopSplitVariation
How the push and pop instructions of callee saved general-purpose registers should be split.
@ SplitR11WindowsSEH
When the stack frame size is not known (because of variable-sized objects or realignment),...
@ SplitR7
R7 and LR must be adjacent, because R7 is the frame pointer, and must point to a frame record consist...
@ SplitR11AAPCSSignRA
When generating AAPCS-compilant frame chains, R11 is the frame pointer, and must be pushed adjacent t...
@ NoSplit
All GPRs can be pushed in a single instruction.
bool isGVInGOT(const GlobalValue *GV) const
Returns the constant pool modifier needed to access the GV.
bool isTargetWatchABI() const
bool UseMulOps
UseMulOps - True if non-microcoded fused integer multiply-add and multiply-subtract instructions shou...
const TargetOptions & Options
Options passed via command line that could influence the target.
@ DoubleIssueCheckUnalignedAccess
Can load/store 2 registers/cycle, but needs an extra cycle if the access is not 64-bit aligned.
@ DoubleIssue
Can load/store 2 registers/cycle.
@ SingleIssuePlusExtras
Can load/store 1 register/cycle, but needs an extra cycle for address computation and potentially als...
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
bool useMachinePipeliner() const
bool useMachineScheduler() const
const LegalizerInfo * getLegalizerInfo() const override
bool isTargetLinux() const
bool isMClass() const
bool SupportsTailCall
SupportsTailCall - True if the OS supports tail call.
int PreISelOperandLatencyAdjustment
The adjustment that we need to apply to get the operand latency from the operand cycle returned by th...
bool isTargetELF() const
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool isDSOLocal() const
bool isDeclarationForLinker() const
bool hasCommonLinkage() const
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Holds all the information related to register banks.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
LLVM_ABI StringRef getArchName() const
Get the architecture (first) component of the triple.
Definition Triple.cpp:1389
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
std::string ParseARMTriple(const Triple &TT, StringRef CPU)
LLVM_ABI ArchKind parseArch(StringRef Arch)
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
InstructionSelector * createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()