LLVM 22.0.0git
HexagonSubtarget.cpp
Go to the documentation of this file.
1//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Hexagon specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonSubtarget.h"
14#include "HexagonInstrInfo.h"
15#include "HexagonRegisterInfo.h"
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/IntrinsicsHexagon.h"
29#include <algorithm>
30#include <cassert>
31#include <map>
32#include <optional>
33
34using namespace llvm;
35
36#define DEBUG_TYPE "hexagon-subtarget"
37
38#define GET_SUBTARGETINFO_CTOR
39#define GET_SUBTARGETINFO_TARGET_DESC
40#include "HexagonGenSubtargetInfo.inc"
41
42static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
43 cl::init(true));
44
45static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
46 cl::init(false));
47
48static cl::opt<bool>
49 EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
50 cl::desc("Enable the scheduler to generate .cur"));
51
52static cl::opt<bool>
53 DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
54 cl::desc("Disable Hexagon MI Scheduling"));
55
57 "hexagon-long-calls", cl::Hidden,
58 cl::desc("If present, forces/disables the use of long calls"));
59
60static cl::opt<bool>
61 EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
62 cl::desc("Consider calls to be predicable"));
63
64static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
65 cl::init(true));
66
67static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
68 cl::Hidden, cl::init(true));
69
71 "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
72 cl::desc("Enable checking for cache bank conflicts"));
73
75 StringRef FS, const TargetMachine &TM)
76 : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
77 OptLevel(TM.getOptLevel()),
78 CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
79 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
80 RegInfo(getHwMode()), TLInfo(TM, *this),
81 InstrItins(getInstrItineraryForCPU(CPUString)) {
83 // Beware of the default constructor of InstrItineraryData: it will
84 // reset all members to 0.
85 assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
86}
87
90 std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
91 if (ArchVer)
92 HexagonArchVersion = *ArchVer;
93 else
94 llvm_unreachable("Unrecognized Hexagon processor version");
95
96 UseHVX128BOps = false;
97 UseHVX64BOps = false;
98 UseAudioOps = false;
99 UseLongCalls = false;
100
101 SubtargetFeatures Features(FS);
102
103 // Turn on QFloat if the HVX version is v68+.
104 // The function ParseSubtargetFeatures will set feature bits and initialize
105 // subtarget's variables all in one, so there isn't a good way to preprocess
106 // the feature string, other than by tinkering with it directly.
107 auto IsQFloatFS = [](StringRef F) {
108 return F == "+hvx-qfloat" || F == "-hvx-qfloat";
109 };
110 if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
111 auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
112 for (StringRef F : llvm::reverse(Features.getFeatures())) {
113 if (F.starts_with("+hvxv"))
114 return F;
115 }
116 for (StringRef F : llvm::reverse(Features.getFeatures())) {
117 if (F == "-hvx")
118 return StringRef();
119 if (F.starts_with("+hvx") || F == "-hvx")
120 return F.take_front(4); // Return "+hvx" or "-hvx".
121 }
122 return StringRef();
123 };
124
125 bool AddQFloat = false;
126 StringRef HvxVer = getHvxVersion(FS);
127 if (HvxVer.starts_with("+hvxv")) {
128 int Ver = 0;
129 if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
130 AddQFloat = true;
131 } else if (HvxVer == "+hvx") {
132 if (hasV68Ops())
133 AddQFloat = true;
134 }
135
136 if (AddQFloat)
137 Features.AddFeature("+hvx-qfloat");
138 }
139
140 std::string FeatureString = Features.getString();
141 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
142
143 if (useHVXV68Ops())
144 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
145
146 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
148 dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
149
151 UseLongCalls = OverrideLongCalls;
152
154
155 if (isTinyCore()) {
156 // Tiny core has a single thread, so back-to-back scheduling is enabled by
157 // default.
159 UseBSBScheduling = false;
160 }
161
162 FeatureBitset FeatureBits = getFeatureBits();
164 setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
165 setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
166
167 return *this;
168}
169
170bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
171 if (!useHVXOps())
172 return false;
173 if (Ty.isVector())
174 Ty = Ty.getVectorElementType();
175 if (IncludeBool && Ty == MVT::i1)
176 return true;
177 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
178 return llvm::is_contained(ElemTypes, Ty);
179}
180
181bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
182 if (!VecTy.isSimple())
183 return false;
184 if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
185 return false;
186 MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
187 if (!IncludeBool && ElemTy == MVT::i1)
188 return false;
189
190 unsigned HwLen = getVectorLength();
191 unsigned NumElems = VecTy.getVectorNumElements();
192 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
193
194 if (IncludeBool && ElemTy == MVT::i1) {
195 // Boolean HVX vector types are formed from regular HVX vector types
196 // by replacing the element type with i1.
197 for (MVT T : ElemTypes)
198 if (NumElems * T.getSizeInBits() == 8 * HwLen)
199 return true;
200 return false;
201 }
202
203 unsigned VecWidth = VecTy.getSizeInBits();
204 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
205 return false;
206 return llvm::is_contained(ElemTypes, ElemTy);
207}
208
209bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
210 if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
211 return false;
212 // Avoid types like <2 x i32*>.
213 Type *ScalTy = VecTy->getScalarType();
214 if (!ScalTy->isIntegerTy() &&
215 !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
216 return false;
217 // The given type may be something like <17 x i32>, which is not MVT,
218 // but can be represented as (non-simple) EVT.
219 EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
220 if (!Ty.getVectorElementType().isSimple())
221 return false;
222
223 auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
224 if (isHVXVectorType(SimpleTy, IncludeBool))
225 return true;
226 auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
228 };
229
230 // Round up EVT to have power-of-2 elements, and keep checking if it
231 // qualifies for HVX, dividing it in half after each step.
232 MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
233 unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
234 while (VecLen > 1) {
235 MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
236 if (SimpleTy.isValid() && isHvxTy(SimpleTy))
237 return true;
238 VecLen /= 2;
239 }
240
241 return false;
242}
243
245 for (SUnit &SU : DAG->SUnits) {
246 if (!SU.isInstr())
247 continue;
249 for (auto &D : SU.Preds)
250 if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
251 Erase.push_back(D);
252 for (auto &E : Erase)
253 SU.removePred(E);
254 }
255}
256
258 for (SUnit &SU : DAG->SUnits) {
259 // Update the latency of chain edges between v60 vector load or store
260 // instructions to be 1. These instruction cannot be scheduled in the
261 // same packet.
262 MachineInstr &MI1 = *SU.getInstr();
263 auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
264 bool IsStoreMI1 = MI1.mayStore();
265 bool IsLoadMI1 = MI1.mayLoad();
266 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
267 continue;
268 for (SDep &SI : SU.Succs) {
269 if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
270 continue;
271 MachineInstr &MI2 = *SI.getSUnit()->getInstr();
272 if (!QII->isHVXVec(MI2))
273 continue;
274 if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
275 SI.setLatency(1);
276 SU.setHeightDirty();
277 // Change the dependence in the opposite direction too.
278 for (SDep &PI : SI.getSUnit()->Preds) {
279 if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
280 continue;
281 PI.setLatency(1);
282 SI.getSUnit()->setDepthDirty();
283 }
284 }
285 }
286 }
287}
288
289// Check if a call and subsequent A2_tfrpi instructions should maintain
290// scheduling affinity. We are looking for the TFRI to be consumed in
291// the next instruction. This should help reduce the instances of
292// double register pairs being allocated and scheduled before a call
293// when not used until after the call. This situation is exacerbated
294// by the fact that we allocate the pair from the callee saves list,
295// leading to excess spills and restores.
296bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
297 const HexagonInstrInfo &HII, const SUnit &Inst1,
298 const SUnit &Inst2) const {
299 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
300 return false;
301
302 // TypeXTYPE are 64 bit operations.
303 unsigned Type = HII.getType(*Inst2.getInstr());
306}
307
309 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
310 SUnit* LastSequentialCall = nullptr;
311 // Map from virtual register to physical register from the copy.
312 DenseMap<unsigned, unsigned> VRegHoldingReg;
313 // Map from the physical register to the instruction that uses virtual
314 // register. This is used to create the barrier edge.
315 DenseMap<unsigned, SUnit *> LastVRegUse;
316 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
317 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
318
319 // Currently we only catch the situation when compare gets scheduled
320 // before preceding call.
321 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
322 // Remember the call.
323 if (DAG->SUnits[su].getInstr()->isCall())
324 LastSequentialCall = &DAG->SUnits[su];
325 // Look for a compare that defines a predicate.
326 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
327 DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
328 // Look for call and tfri* instructions.
329 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
330 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
331 DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
332 // Prevent redundant register copies due to reads and writes of physical
333 // registers. The original motivation for this was the code generated
334 // between two calls, which are caused both the return value and the
335 // argument for the next call being in %r0.
336 // Example:
337 // 1: <call1>
338 // 2: %vreg = COPY %r0
339 // 3: <use of %vreg>
340 // 4: %r0 = ...
341 // 5: <call2>
342 // The scheduler would often swap 3 and 4, so an additional register is
343 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
344 // this.
345 // The code below checks for all the physical registers, not just R0/D0/V0.
346 else if (SchedRetvalOptimization) {
347 const MachineInstr *MI = DAG->SUnits[su].getInstr();
348 if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
349 // %vregX = COPY %r0
350 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
351 LastVRegUse.erase(MI->getOperand(1).getReg());
352 } else {
353 for (const MachineOperand &MO : MI->operands()) {
354 if (!MO.isReg())
355 continue;
356 if (MO.isUse() && !MI->isCopy() &&
357 VRegHoldingReg.count(MO.getReg())) {
358 // <use of %vregX>
359 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
360 } else if (MO.isDef() && MO.getReg().isPhysical()) {
361 for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
362 ++AI) {
363 if (auto It = LastVRegUse.find(*AI); It != LastVRegUse.end()) {
364 if (It->second != &DAG->SUnits[su])
365 // %r0 = ...
366 DAG->addEdge(&DAG->SUnits[su],
367 SDep(It->second, SDep::Barrier));
368 LastVRegUse.erase(It);
369 }
370 }
371 }
372 }
373 }
374 }
375 }
376}
377
380 return;
381
382 const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
383
384 // Create artificial edges between loads that could likely cause a bank
385 // conflict. Since such loads would normally not have any dependency
386 // between them, we cannot rely on existing edges.
387 for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
388 SUnit &S0 = DAG->SUnits[i];
389 MachineInstr &L0 = *S0.getInstr();
390 if (!L0.mayLoad() || L0.mayStore() ||
392 continue;
393 int64_t Offset0;
395 MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
396 // Is the access size is longer than the L1 cache line, skip the check.
397 if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
398 Size0.getValue() >= 32)
399 continue;
400 // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
401 for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
402 SUnit &S1 = DAG->SUnits[j];
403 MachineInstr &L1 = *S1.getInstr();
404 if (!L1.mayLoad() || L1.mayStore() ||
406 continue;
407 int64_t Offset1;
409 MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
410 if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
411 Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
412 continue;
413 // Check bits 3 and 4 of the offset: if they differ, a bank conflict
414 // is unlikely.
415 if (((Offset0 ^ Offset1) & 0x18) != 0)
416 continue;
417 // Bits 3 and 4 are the same, add an artificial edge and set extra
418 // latency.
419 SDep A(&S0, SDep::Artificial);
420 A.setLatency(1);
421 S1.addPred(A, true);
422 }
423 }
424}
425
426/// Enable use of alias analysis during code generation (during MI
427/// scheduling, DAGCombine, etc.).
430 return true;
431 return false;
432}
433
434/// Perform target specific adjustments to the latency of a schedule
435/// dependency.
437 SUnit *Src, int SrcOpIdx, SUnit *Dst, int DstOpIdx, SDep &Dep,
438 const TargetSchedModel *SchedModel) const {
439 if (!Src->isInstr() || !Dst->isInstr())
440 return;
441
442 MachineInstr *SrcInst = Src->getInstr();
443 MachineInstr *DstInst = Dst->getInstr();
444 const HexagonInstrInfo *QII = getInstrInfo();
445
446 // Instructions with .new operands have zero latency.
449 if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
450 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
451 Dep.setLatency(0);
452 return;
453 }
454
455 // Set the latency for a copy to zero since we hope that is will get
456 // removed.
457 if (DstInst->isCopy())
458 Dep.setLatency(0);
459
460 // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
461 // the correct latency.
462 // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
463 // only if the latencies on all the uses are equal, otherwise set it to
464 // default.
465 if ((DstInst->isRegSequence() || DstInst->isCopy())) {
466 Register DReg = DstInst->getOperand(0).getReg();
467 std::optional<unsigned> DLatency;
468 for (const auto &DDep : Dst->Succs) {
469 MachineInstr *DDst = DDep.getSUnit()->getInstr();
470 int UseIdx = -1;
471 for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
472 const MachineOperand &MO = DDst->getOperand(OpNum);
473 if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
474 UseIdx = OpNum;
475 break;
476 }
477 }
478
479 if (UseIdx == -1)
480 continue;
481
482 std::optional<unsigned> Latency =
483 InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
484
485 // Set DLatency for the first time.
486 if (!DLatency)
487 DLatency = Latency;
488
489 // For multiple uses, if the Latency is different across uses, reset
490 // DLatency.
491 if (DLatency != Latency) {
492 DLatency = std::nullopt;
493 break;
494 }
495 }
496 Dep.setLatency(DLatency.value_or(0));
497 }
498
499 // Try to schedule uses near definitions to generate .cur.
500 ExclSrc.clear();
501 ExclDst.clear();
502 if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
503 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
504 Dep.setLatency(0);
505 return;
506 }
507 int Latency = Dep.getLatency();
508 bool IsArtificial = Dep.isArtificial();
509 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
510 Dep.setLatency(Latency);
511}
512
514 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
515 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
516 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
517 Mutations.push_back(std::make_unique<BankConflictMutation>());
518}
519
521 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
522 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
523 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
524}
525
526// Pin the vtable to this file.
527void HexagonSubtarget::anchor() {}
528
531 return !DisableHexagonMISched;
532 return true;
533}
534
537}
538
539int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
540 MachineInstr &DstInst, bool IsArtificial,
541 int Latency) const {
542 if (IsArtificial)
543 return 1;
544 if (!hasV60Ops())
545 return Latency;
546
547 auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
548 // BSB scheduling.
549 if (QII.isHVXVec(SrcInst) || useBSBScheduling())
550 Latency = (Latency + 1) >> 1;
551 return Latency;
552}
553
554void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
555 MachineInstr *SrcI = Src->getInstr();
556 for (auto &I : Src->Succs) {
557 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
558 continue;
559 Register DepR = I.getReg();
560 int DefIdx = -1;
561 for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
562 const MachineOperand &MO = SrcI->getOperand(OpNum);
563 bool IsSameOrSubReg = false;
564 if (MO.isReg()) {
565 Register MOReg = MO.getReg();
566 if (DepR.isVirtual()) {
567 IsSameOrSubReg = (MOReg == DepR);
568 } else {
569 IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
570 }
571 if (MO.isDef() && IsSameOrSubReg)
572 DefIdx = OpNum;
573 }
574 }
575 assert(DefIdx >= 0 && "Def Reg not found in Src MI");
576 MachineInstr *DstI = Dst->getInstr();
577 SDep T = I;
578 for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
579 const MachineOperand &MO = DstI->getOperand(OpNum);
580 if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
581 std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
582 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
583
584 // For some instructions (ex: COPY), we might end up with < 0 latency
585 // as they don't have any Itinerary class associated with them.
586 if (!Latency)
587 Latency = 0;
588 bool IsArtificial = I.isArtificial();
589 Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
590 I.setLatency(*Latency);
591 }
592 }
593
594 // Update the latency of opposite edge too.
595 T.setSUnit(Src);
596 auto F = find(Dst->Preds, T);
597 assert(F != Dst->Preds.end());
598 F->setLatency(I.getLatency());
599 }
600}
601
602/// Change the latency between the two SUnits.
603void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
604 const {
605 for (auto &I : Src->Succs) {
606 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
607 continue;
608 SDep T = I;
609 I.setLatency(Lat);
610
611 // Update the latency of opposite edge too.
612 T.setSUnit(Src);
613 auto F = find(Dst->Preds, T);
614 assert(F != Dst->Preds.end());
615 F->setLatency(Lat);
616 }
617}
618
619/// If the SUnit has a zero latency edge, return the other SUnit.
621 for (auto &I : Deps)
622 if (I.isAssignedRegDep() && I.getLatency() == 0 &&
623 !I.getSUnit()->getInstr()->isPseudo())
624 return I.getSUnit();
625 return nullptr;
626}
627
628// Return true if these are the best two instructions to schedule
629// together with a zero latency. Only one dependence should have a zero
630// latency. If there are multiple choices, choose the best, and change
631// the others, if needed.
632bool HexagonSubtarget::isBestZeroLatency(
633 SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII,
634 SmallPtrSet<SUnit *, 4> &ExclSrc, SmallPtrSet<SUnit *, 4> &ExclDst) const {
635 MachineInstr &SrcInst = *Src->getInstr();
636 MachineInstr &DstInst = *Dst->getInstr();
637
638 // Ignore Boundary SU nodes as these have null instructions.
639 if (Dst->isBoundaryNode())
640 return false;
641
642 if (SrcInst.isPHI() || DstInst.isPHI())
643 return false;
644
645 if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
646 !TII->canExecuteInBundle(SrcInst, DstInst))
647 return false;
648
649 // The architecture doesn't allow three dependent instructions in the same
650 // packet. So, if the destination has a zero latency successor, then it's
651 // not a candidate for a zero latency predecessor.
652 if (getZeroLatency(Dst, Dst->Succs) != nullptr)
653 return false;
654
655 // Check if the Dst instruction is the best candidate first.
656 SUnit *Best = nullptr;
657 SUnit *DstBest = nullptr;
658 SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
659 if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
660 // Check that Src doesn't have a better candidate.
661 DstBest = getZeroLatency(Src, Src->Succs);
662 if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
663 Best = Dst;
664 }
665 if (Best != Dst)
666 return false;
667
668 // The caller frequently adds the same dependence twice. If so, then
669 // return true for this case too.
670 if ((Src == SrcBest && Dst == DstBest ) ||
671 (SrcBest == nullptr && Dst == DstBest) ||
672 (Src == SrcBest && Dst == nullptr))
673 return true;
674
675 // Reassign the latency for the previous bests, which requires setting
676 // the dependence edge in both directions.
677 if (SrcBest != nullptr) {
678 if (!hasV60Ops())
679 changeLatency(SrcBest, Dst, 1);
680 else
681 restoreLatency(SrcBest, Dst);
682 }
683 if (DstBest != nullptr) {
684 if (!hasV60Ops())
685 changeLatency(Src, DstBest, 1);
686 else
687 restoreLatency(Src, DstBest);
688 }
689
690 // Attempt to find another opportunity for zero latency in a different
691 // dependence.
692 if (SrcBest && DstBest)
693 // If there is an edge from SrcBest to DstBst, then try to change that
694 // to 0 now.
695 changeLatency(SrcBest, DstBest, 0);
696 else if (DstBest) {
697 // Check if the previous best destination instruction has a new zero
698 // latency dependence opportunity.
699 ExclSrc.insert(Src);
700 for (auto &I : DstBest->Preds)
701 if (ExclSrc.count(I.getSUnit()) == 0 &&
702 isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
703 changeLatency(I.getSUnit(), DstBest, 0);
704 } else if (SrcBest) {
705 // Check if previous best source instruction has a new zero latency
706 // dependence opportunity.
707 ExclDst.insert(Dst);
708 for (auto &I : SrcBest->Succs)
709 if (ExclDst.count(I.getSUnit()) == 0 &&
710 isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
711 changeLatency(SrcBest, I.getSUnit(), 0);
712 }
713
714 return true;
715}
716
718 return 32;
719}
720
722 return 32;
723}
724
725bool HexagonSubtarget::enableSubRegLiveness() const { return true; }
726
728 struct Scalar {
729 unsigned Opcode;
730 Intrinsic::ID IntId;
731 };
732 struct Hvx {
733 unsigned Opcode;
734 Intrinsic::ID Int64Id, Int128Id;
735 };
736
737 static Scalar ScalarInts[] = {
738#define GET_SCALAR_INTRINSICS
740#undef GET_SCALAR_INTRINSICS
741 };
742
743 static Hvx HvxInts[] = {
744#define GET_HVX_INTRINSICS
746#undef GET_HVX_INTRINSICS
747 };
748
749 const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
750 [[maybe_unused]] static bool SortedScalar =
751 (llvm::sort(ScalarInts, CmpOpcode), true);
752 [[maybe_unused]] static bool SortedHvx =
753 (llvm::sort(HvxInts, CmpOpcode), true);
754
755 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
756 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
757
758 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
759 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
760 return FoundScalar->IntId;
761
762 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
763 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
764 unsigned HwLen = getVectorLength();
765 if (HwLen == 64)
766 return FoundHvx->Int64Id;
767 if (HwLen == 128)
768 return FoundHvx->Int128Id;
769 }
770
771 std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
772 llvm_unreachable(error.c_str());
773 return 0;
774}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
#define error(X)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
getOperandLatency - Compute and return the use operand latency of a given pair of def and use.
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, LocationSize &AccessSize) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Perform target specific adjustments to the latency of a schedule dependency.
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
bool enableSubRegLiveness() const override
CodeGenOptLevel OptLevel
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const InstrItinerary * Itineraries
Array of itineraries selected.
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
bool isVector() const
Return true if this is a vector value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
bool isCopy() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:590
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:74
Scheduling dependency.
Definition: ScheduleDAG.h:51
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:57
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:58
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:71
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:74
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:249
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:387
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:277
LLVM_ABI void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:270
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:269
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:399
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:584
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:588
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:586
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:509
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:269
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:619
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
LLVM_ABI std::string getString() const
Returns features as a string.
LLVM_ABI void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
int getNumOccurrences() const
Definition: CommandLine.h:400
unsigned getPosition() const
Definition: CommandLine.h:307
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1770
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:390
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
#define N
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:299
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override