55#define DEBUG_TYPE "aarch64-ldst-opt"
57STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded,
"Number of post-index updates folded");
59STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
65 "not passed the alignment check");
67 "Number of const offset of index address folded");
70 "Controls which pairs are considered for renaming");
90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
94using LdStPairFlags =
struct LdStPairFlags {
98 bool MergeForward =
false;
109 std::optional<MCPhysReg> RenameReg;
111 LdStPairFlags() =
default;
113 void setMergeForward(
bool V =
true) { MergeForward = V; }
114 bool getMergeForward()
const {
return MergeForward; }
116 void setSExtIdx(
int V) { SExtIdx = V; }
117 int getSExtIdx()
const {
return SExtIdx; }
119 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
120 void clearRenameReg() { RenameReg = std::nullopt; }
121 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
147 LdStPairFlags &Flags,
149 bool FindNarrowMerge);
160 const LdStPairFlags &Flags);
166 const LdStPairFlags &Flags);
178 int UnscaledOffset,
unsigned Limit);
200 unsigned BaseReg,
int Offset);
203 unsigned IndexReg,
unsigned &
Offset);
206 std::optional<MachineBasicBlock::iterator>
209 bool IsPreIdx,
bool MergeEither);
242char AArch64LoadStoreOpt::ID = 0;
249static
bool isNarrowStore(
unsigned Opc) {
253 case AArch64::STRBBui:
254 case AArch64::STURBBi:
255 case AArch64::STRHHui:
256 case AArch64::STURHHi:
264 switch (
MI.getOpcode()) {
270 case AArch64::STZ2Gi:
276 bool *IsValidLdStrOpc =
nullptr) {
278 *IsValidLdStrOpc =
true;
282 *IsValidLdStrOpc =
false;
283 return std::numeric_limits<unsigned>::max();
284 case AArch64::STRDui:
285 case AArch64::STURDi:
286 case AArch64::STRDpre:
287 case AArch64::STRQui:
288 case AArch64::STURQi:
289 case AArch64::STRQpre:
290 case AArch64::STRBBui:
291 case AArch64::STURBBi:
292 case AArch64::STRHHui:
293 case AArch64::STURHHi:
294 case AArch64::STRWui:
295 case AArch64::STRWpre:
296 case AArch64::STURWi:
297 case AArch64::STRXui:
298 case AArch64::STRXpre:
299 case AArch64::STURXi:
300 case AArch64::STR_ZXI:
301 case AArch64::LDRDui:
302 case AArch64::LDURDi:
303 case AArch64::LDRDpre:
304 case AArch64::LDRQui:
305 case AArch64::LDURQi:
306 case AArch64::LDRQpre:
307 case AArch64::LDRWui:
308 case AArch64::LDURWi:
309 case AArch64::LDRWpre:
310 case AArch64::LDRXui:
311 case AArch64::LDURXi:
312 case AArch64::LDRXpre:
313 case AArch64::STRSui:
314 case AArch64::STURSi:
315 case AArch64::STRSpre:
316 case AArch64::LDRSui:
317 case AArch64::LDURSi:
318 case AArch64::LDRSpre:
319 case AArch64::LDR_ZXI:
321 case AArch64::LDRSWui:
322 return AArch64::LDRWui;
323 case AArch64::LDURSWi:
324 return AArch64::LDURWi;
325 case AArch64::LDRSWpre:
326 return AArch64::LDRWpre;
334 case AArch64::STRBBui:
335 return AArch64::STRHHui;
336 case AArch64::STRHHui:
337 return AArch64::STRWui;
338 case AArch64::STURBBi:
339 return AArch64::STURHHi;
340 case AArch64::STURHHi:
341 return AArch64::STURWi;
342 case AArch64::STURWi:
343 return AArch64::STURXi;
344 case AArch64::STRWui:
345 return AArch64::STRXui;
353 case AArch64::STRSui:
354 case AArch64::STURSi:
355 return AArch64::STPSi;
356 case AArch64::STRSpre:
357 return AArch64::STPSpre;
358 case AArch64::STRDui:
359 case AArch64::STURDi:
360 return AArch64::STPDi;
361 case AArch64::STRDpre:
362 return AArch64::STPDpre;
363 case AArch64::STRQui:
364 case AArch64::STURQi:
365 case AArch64::STR_ZXI:
366 return AArch64::STPQi;
367 case AArch64::STRQpre:
368 return AArch64::STPQpre;
369 case AArch64::STRWui:
370 case AArch64::STURWi:
371 return AArch64::STPWi;
372 case AArch64::STRWpre:
373 return AArch64::STPWpre;
374 case AArch64::STRXui:
375 case AArch64::STURXi:
376 return AArch64::STPXi;
377 case AArch64::STRXpre:
378 return AArch64::STPXpre;
379 case AArch64::LDRSui:
380 case AArch64::LDURSi:
381 return AArch64::LDPSi;
382 case AArch64::LDRSpre:
383 return AArch64::LDPSpre;
384 case AArch64::LDRDui:
385 case AArch64::LDURDi:
386 return AArch64::LDPDi;
387 case AArch64::LDRDpre:
388 return AArch64::LDPDpre;
389 case AArch64::LDRQui:
390 case AArch64::LDURQi:
391 case AArch64::LDR_ZXI:
392 return AArch64::LDPQi;
393 case AArch64::LDRQpre:
394 return AArch64::LDPQpre;
395 case AArch64::LDRWui:
396 case AArch64::LDURWi:
397 return AArch64::LDPWi;
398 case AArch64::LDRWpre:
399 return AArch64::LDPWpre;
400 case AArch64::LDRXui:
401 case AArch64::LDURXi:
402 return AArch64::LDPXi;
403 case AArch64::LDRXpre:
404 return AArch64::LDPXpre;
405 case AArch64::LDRSWui:
406 case AArch64::LDURSWi:
407 return AArch64::LDPSWi;
408 case AArch64::LDRSWpre:
409 return AArch64::LDPSWpre;
420 case AArch64::LDRBBui:
421 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
422 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
423 case AArch64::LDURBBi:
424 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
425 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
426 case AArch64::LDRHHui:
427 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
428 StOpc == AArch64::STRXui;
429 case AArch64::LDURHHi:
430 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
431 StOpc == AArch64::STURXi;
432 case AArch64::LDRWui:
433 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
434 case AArch64::LDURWi:
435 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
436 case AArch64::LDRXui:
437 return StOpc == AArch64::STRXui;
438 case AArch64::LDURXi:
439 return StOpc == AArch64::STURXi;
451 case AArch64::STRSui:
452 return AArch64::STRSpre;
453 case AArch64::STRDui:
454 return AArch64::STRDpre;
455 case AArch64::STRQui:
456 return AArch64::STRQpre;
457 case AArch64::STRBBui:
458 return AArch64::STRBBpre;
459 case AArch64::STRHHui:
460 return AArch64::STRHHpre;
461 case AArch64::STRWui:
462 return AArch64::STRWpre;
463 case AArch64::STRXui:
464 return AArch64::STRXpre;
465 case AArch64::LDRSui:
466 return AArch64::LDRSpre;
467 case AArch64::LDRDui:
468 return AArch64::LDRDpre;
469 case AArch64::LDRQui:
470 return AArch64::LDRQpre;
471 case AArch64::LDRBBui:
472 return AArch64::LDRBBpre;
473 case AArch64::LDRHHui:
474 return AArch64::LDRHHpre;
475 case AArch64::LDRWui:
476 return AArch64::LDRWpre;
477 case AArch64::LDRXui:
478 return AArch64::LDRXpre;
479 case AArch64::LDRSWui:
480 return AArch64::LDRSWpre;
482 return AArch64::LDPSpre;
483 case AArch64::LDPSWi:
484 return AArch64::LDPSWpre;
486 return AArch64::LDPDpre;
488 return AArch64::LDPQpre;
490 return AArch64::LDPWpre;
492 return AArch64::LDPXpre;
494 return AArch64::STPSpre;
496 return AArch64::STPDpre;
498 return AArch64::STPQpre;
500 return AArch64::STPWpre;
502 return AArch64::STPXpre;
504 return AArch64::STGPreIndex;
506 return AArch64::STZGPreIndex;
508 return AArch64::ST2GPreIndex;
509 case AArch64::STZ2Gi:
510 return AArch64::STZ2GPreIndex;
512 return AArch64::STGPpre;
521 case AArch64::LDRBroX:
522 return AArch64::LDRBui;
523 case AArch64::LDRBBroX:
524 return AArch64::LDRBBui;
525 case AArch64::LDRSBXroX:
526 return AArch64::LDRSBXui;
527 case AArch64::LDRSBWroX:
528 return AArch64::LDRSBWui;
529 case AArch64::LDRHroX:
530 return AArch64::LDRHui;
531 case AArch64::LDRHHroX:
532 return AArch64::LDRHHui;
533 case AArch64::LDRSHXroX:
534 return AArch64::LDRSHXui;
535 case AArch64::LDRSHWroX:
536 return AArch64::LDRSHWui;
537 case AArch64::LDRWroX:
538 return AArch64::LDRWui;
539 case AArch64::LDRSroX:
540 return AArch64::LDRSui;
541 case AArch64::LDRSWroX:
542 return AArch64::LDRSWui;
543 case AArch64::LDRDroX:
544 return AArch64::LDRDui;
545 case AArch64::LDRXroX:
546 return AArch64::LDRXui;
547 case AArch64::LDRQroX:
548 return AArch64::LDRQui;
556 case AArch64::STRSui:
557 case AArch64::STURSi:
558 return AArch64::STRSpost;
559 case AArch64::STRDui:
560 case AArch64::STURDi:
561 return AArch64::STRDpost;
562 case AArch64::STRQui:
563 case AArch64::STURQi:
564 return AArch64::STRQpost;
565 case AArch64::STRBBui:
566 return AArch64::STRBBpost;
567 case AArch64::STRHHui:
568 return AArch64::STRHHpost;
569 case AArch64::STRWui:
570 case AArch64::STURWi:
571 return AArch64::STRWpost;
572 case AArch64::STRXui:
573 case AArch64::STURXi:
574 return AArch64::STRXpost;
575 case AArch64::LDRSui:
576 case AArch64::LDURSi:
577 return AArch64::LDRSpost;
578 case AArch64::LDRDui:
579 case AArch64::LDURDi:
580 return AArch64::LDRDpost;
581 case AArch64::LDRQui:
582 case AArch64::LDURQi:
583 return AArch64::LDRQpost;
584 case AArch64::LDRBBui:
585 return AArch64::LDRBBpost;
586 case AArch64::LDRHHui:
587 return AArch64::LDRHHpost;
588 case AArch64::LDRWui:
589 case AArch64::LDURWi:
590 return AArch64::LDRWpost;
591 case AArch64::LDRXui:
592 case AArch64::LDURXi:
593 return AArch64::LDRXpost;
594 case AArch64::LDRSWui:
595 return AArch64::LDRSWpost;
597 return AArch64::LDPSpost;
598 case AArch64::LDPSWi:
599 return AArch64::LDPSWpost;
601 return AArch64::LDPDpost;
603 return AArch64::LDPQpost;
605 return AArch64::LDPWpost;
607 return AArch64::LDPXpost;
609 return AArch64::STPSpost;
611 return AArch64::STPDpost;
613 return AArch64::STPQpost;
615 return AArch64::STPWpost;
617 return AArch64::STPXpost;
619 return AArch64::STGPostIndex;
621 return AArch64::STZGPostIndex;
623 return AArch64::ST2GPostIndex;
624 case AArch64::STZ2Gi:
625 return AArch64::STZ2GPostIndex;
627 return AArch64::STGPpost;
634 unsigned OpcB =
MI.getOpcode();
639 case AArch64::STRSpre:
640 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
641 case AArch64::STRDpre:
642 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
643 case AArch64::STRQpre:
644 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
645 case AArch64::STRWpre:
646 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
647 case AArch64::STRXpre:
648 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
649 case AArch64::LDRSpre:
650 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
651 case AArch64::LDRDpre:
652 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
653 case AArch64::LDRQpre:
654 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
655 case AArch64::LDRWpre:
656 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
657 case AArch64::LDRXpre:
658 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
659 case AArch64::LDRSWpre:
660 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
666 int &MinOffset,
int &MaxOffset) {
684 unsigned PairedRegOp = 0) {
685 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
691 return MI.getOperand(
Idx);
700 int UnscaledStOffset =
704 int UnscaledLdOffset =
708 return (UnscaledStOffset <= UnscaledLdOffset) &&
709 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
713 unsigned Opc =
MI.getOpcode();
714 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
715 isNarrowStore(
Opc)) &&
720 switch (
MI.getOpcode()) {
724 case AArch64::LDRBBui:
725 case AArch64::LDRHHui:
726 case AArch64::LDRWui:
727 case AArch64::LDRXui:
729 case AArch64::LDURBBi:
730 case AArch64::LDURHHi:
731 case AArch64::LDURWi:
732 case AArch64::LDURXi:
738 unsigned Opc =
MI.getOpcode();
743 case AArch64::STRSui:
744 case AArch64::STRDui:
745 case AArch64::STRQui:
746 case AArch64::STRXui:
747 case AArch64::STRWui:
748 case AArch64::STRHHui:
749 case AArch64::STRBBui:
750 case AArch64::LDRSui:
751 case AArch64::LDRDui:
752 case AArch64::LDRQui:
753 case AArch64::LDRXui:
754 case AArch64::LDRWui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRBBui:
760 case AArch64::STZ2Gi:
763 case AArch64::STURSi:
764 case AArch64::STURDi:
765 case AArch64::STURQi:
766 case AArch64::STURWi:
767 case AArch64::STURXi:
768 case AArch64::LDURSi:
769 case AArch64::LDURDi:
770 case AArch64::LDURQi:
771 case AArch64::LDURWi:
772 case AArch64::LDURXi:
775 case AArch64::LDPSWi:
804 unsigned Opc =
MI.getOpcode();
810 case AArch64::LDRBroX:
811 case AArch64::LDRBBroX:
812 case AArch64::LDRSBXroX:
813 case AArch64::LDRSBWroX:
816 case AArch64::LDRHroX:
817 case AArch64::LDRHHroX:
818 case AArch64::LDRSHXroX:
819 case AArch64::LDRSHWroX:
822 case AArch64::LDRWroX:
823 case AArch64::LDRSroX:
824 case AArch64::LDRSWroX:
827 case AArch64::LDRDroX:
828 case AArch64::LDRXroX:
831 case AArch64::LDRQroX:
841 case AArch64::ORRWrs:
842 case AArch64::ADDWri:
850 const LdStPairFlags &Flags) {
852 "Expected promotable zero stores.");
860 if (NextI == MergeMI)
863 unsigned Opc =
I->getOpcode();
864 unsigned MergeMIOpc = MergeMI->getOpcode();
865 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
866 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
867 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
868 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
870 bool MergeForward =
Flags.getMergeForward();
881 int64_t IOffsetInBytes =
883 int64_t MIOffsetInBytes =
888 if (IOffsetInBytes > MIOffsetInBytes)
889 OffsetImm = MIOffsetInBytes;
891 OffsetImm = IOffsetInBytes;
896 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
897 int NewOffsetStride =
TII->getMemScale(NewOpcode);
898 assert(((OffsetImm % NewOffsetStride) == 0) &&
899 "Offset should be a multiple of the store memory scale");
900 OffsetImm = OffsetImm / NewOffsetStride;
908 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
912 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
915 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
924 I->eraseFromParent();
925 MergeMI->eraseFromParent();
935 auto MBB =
MI.getParent();
943 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
944 TRI->regsOverlap(MOP.getReg(), DefReg);
958 if (MOP.isReg() && MOP.isKill())
962 if (MOP.isReg() && !MOP.isKill())
963 Units.
addReg(MOP.getReg());
970 unsigned InstrNumToSet,
977 unsigned OperandNo = 0;
978 bool RegFound =
false;
979 for (
const auto Op : MergedInstr.
operands()) {
980 if (
Op.getReg() == Reg) {
989 {InstrNumToSet, OperandNo});
995 const LdStPairFlags &Flags) {
1002 if (NextI == Paired)
1005 int SExtIdx =
Flags.getSExtIdx();
1008 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1009 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1011 bool MergeForward =
Flags.getMergeForward();
1013 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1016 DefinedInBB.addReg(*RenameReg);
1020 auto GetMatchingSubReg =
1023 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1024 if (
C->contains(SubOrSuper))
1031 [
this, RegToRename, GetMatchingSubReg, MergeForward](
MachineInstr &
MI,
1034 bool SeenDef =
false;
1040 (!MergeForward || !SeenDef ||
1042 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1045 "Need renamable operands");
1049 MatchingReg = GetMatchingSubReg(RC);
1053 MatchingReg = GetMatchingSubReg(
1054 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1064 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1067 "Need renamable operands");
1071 MatchingReg = GetMatchingSubReg(RC);
1073 MatchingReg = GetMatchingSubReg(
1074 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1075 assert(MatchingReg != AArch64::NoRegister &&
1076 "Cannot find matching regs for renaming");
1085 TRI, UINT32_MAX, UpdateMIs);
1098 RegToCheck = RegToRename;
1101 MergeForward ? std::next(
I) :
I,
1102 MergeForward ? std::next(Paired) : Paired))
1105 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1107 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1109 "Rename register used between paired instruction, trashing the "
1125 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1126 if (IsUnscaled != PairedIsUnscaled) {
1130 int MemSize =
TII->getMemScale(*Paired);
1131 if (PairedIsUnscaled) {
1134 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1135 "Offset should be a multiple of the stride!");
1136 PairedOffset /= MemSize;
1138 PairedOffset *= MemSize;
1146 if (
Offset == PairedOffset + OffsetStride &&
1154 SExtIdx = (SExtIdx + 1) % 2;
1162 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1163 "Unscaled offset cannot be scaled.");
1164 OffsetImm /=
TII->getMemScale(*RtMI);
1173 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1175 if (RegOp0.
isUse()) {
1176 if (!MergeForward) {
1187 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1188 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1197 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1198 MI.clearRegisterKills(Reg,
TRI);
1214 .setMIFlags(
I->mergeFlagsWith(*Paired));
1219 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1224 if (SExtIdx != -1) {
1234 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1290 if (
I->peekDebugInstrNum()) {
1298 unsigned NewInstrNum;
1299 if (DstRegX ==
I->getOperand(0).getReg()) {
1308 if (Paired->peekDebugInstrNum()) {
1316 unsigned NewInstrNum;
1317 if (DstRegX == Paired->getOperand(0).getReg()) {
1330 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1336 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1337 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1338 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1369 if (
I->peekDebugInstrNum()) {
1374 if (Paired->peekDebugInstrNum()) {
1387 DefinedInBB.addReg(MOP.
getReg());
1390 I->eraseFromParent();
1391 Paired->eraseFromParent();
1400 next_nodbg(LoadI, LoadI->getParent()->end());
1402 int LoadSize =
TII->getMemScale(*LoadI);
1403 int StoreSize =
TII->getMemScale(*StoreI);
1407 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1410 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1411 "Unexpected RegClass");
1414 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1417 if (StRt == LdRt && LoadSize == 8) {
1419 LoadI->getIterator())) {
1420 if (
MI.killsRegister(StRt,
TRI)) {
1421 MI.clearRegisterKills(StRt,
TRI);
1428 LoadI->eraseFromParent();
1433 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1434 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1435 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1442 if (!Subtarget->isLittleEndian())
1444 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1445 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1446 "Unsupported ld/st match");
1447 assert(LoadSize <= StoreSize &&
"Invalid load size");
1448 int UnscaledLdOffset =
1452 int UnscaledStOffset =
1456 int Width = LoadSize * 8;
1459 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1462 assert((UnscaledLdOffset >= UnscaledStOffset &&
1463 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1466 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1467 int Imms = Immr + Width - 1;
1468 if (UnscaledLdOffset == UnscaledStOffset) {
1469 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1475 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1476 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1481 }
else if (IsStoreXReg && Imms == 31) {
1484 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1485 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1486 TII->get(AArch64::UBFMWri),
1487 TRI->getSubReg(DestReg, AArch64::sub_32))
1488 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1494 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1495 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1507 if (
MI.killsRegister(StRt,
TRI)) {
1508 MI.clearRegisterKills(StRt,
TRI);
1523 LoadI->eraseFromParent();
1533 if (
Offset % OffsetStride)
1537 return Offset <= 63 && Offset >= -64;
1545 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1552 if (MIa.
mayAlias(AA, *MIb,
false)) {
1562bool AArch64LoadStoreOpt::findMatchingStore(
1577 ModifiedRegUnits.clear();
1578 UsedRegUnits.clear();
1587 if (!
MI.isTransient())
1613 if (!ModifiedRegUnits.available(BaseReg))
1619 }
while (
MBBI !=
B && Count < Limit);
1631 LdStPairFlags &Flags,
1634 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1639 !
TII->isLdStPairSuppressed(FirstMI) &&
1640 "FirstMI shouldn't get here if either of these checks are true.");
1647 unsigned OpcB =
MI.getOpcode();
1655 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1656 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1664 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1666 assert(IsValidLdStrOpc &&
1667 "Given Opc should be a Load or Store with an immediate");
1670 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1676 if (!PairIsValidLdStrOpc)
1681 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1684 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1693 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1702 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1709 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1710 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1711 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1712 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1715 <<
" Cannot rename operands with multiple disjunct subregisters ("
1726 return TRI->isSuperOrSubRegisterEq(
1749 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1750 MOP.isImplicit() && MOP.isKill() &&
1751 TRI->regsOverlap(RegToRename, MOP.getReg());
1757 bool FoundDef =
false;
1788 if (
MI.isPseudo()) {
1789 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1793 for (
auto &MOP :
MI.operands()) {
1795 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1805 for (
auto &MOP :
MI.operands()) {
1807 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1824 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1852 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1854 if (MI.getFlag(MachineInstr::FrameSetup)) {
1855 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1860 for (
auto &MOP :
MI.operands()) {
1861 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1862 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1864 if (!canRenameMOP(MOP, TRI)) {
1865 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1891 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1892 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1894 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1900 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1903 TRI->sub_and_superregs_inclusive(PR),
1904 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1908 auto *RegClass =
TRI->getMinimalPhysRegClass(Reg);
1911 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1912 CanBeUsedForAllClasses(PR)) {
1920 <<
TRI->getRegClassName(RegClass) <<
"\n");
1921 return std::nullopt;
1932 std::optional<MCPhysReg> RenameReg;
1941 const bool IsLoad = FirstMI.
mayLoad();
1943 if (!MaybeCanRename) {
1946 RequiredClasses,
TRI)};
1952 if (*MaybeCanRename) {
1954 RequiredClasses,
TRI);
1963 LdStPairFlags &Flags,
unsigned Limit,
1964 bool FindNarrowMerge) {
1972 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
1976 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
1979 std::optional<bool> MaybeCanRename;
1981 MaybeCanRename = {
false};
1987 Flags.clearRenameReg();
1991 ModifiedRegUnits.clear();
1992 UsedRegUnits.clear();
1998 for (
unsigned Count = 0;
MBBI != E && Count < Limit;
2007 if (!
MI.isTransient())
2010 Flags.setSExtIdx(-1);
2013 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2022 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2023 if (IsUnscaled != MIIsUnscaled) {
2027 int MemSize =
TII->getMemScale(
MI);
2031 if (MIOffset % MemSize) {
2037 MIOffset /= MemSize;
2039 MIOffset *= MemSize;
2045 if (BaseReg == MIBaseReg) {
2051 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2052 bool IsBaseRegUsed = !UsedRegUnits.available(
2054 bool IsBaseRegModified = !ModifiedRegUnits.available(
2059 bool IsMIRegTheSame =
2062 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2070 if ((
Offset != MIOffset + OffsetStride) &&
2071 (
Offset + OffsetStride != MIOffset)) {
2080 if (FindNarrowMerge) {
2085 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2102 <<
"keep looking.\n");
2108 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2113 <<
"Offset doesn't fit due to alignment requirements, "
2114 <<
"keep looking.\n");
2125 if (!ModifiedRegUnits.available(BaseReg))
2128 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2135 bool RtNotModified =
2137 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2140 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2142 << (RtNotModified ?
"true" :
"false") <<
"\n"
2144 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2146 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2151 std::optional<MCPhysReg> RenameReg =
2153 Reg, DefinedInBB, UsedInBetween,
2154 RequiredClasses,
TRI);
2160 <<
"keep looking.\n");
2163 Flags.setRenameReg(*RenameReg);
2166 Flags.setMergeForward(
false);
2168 Flags.clearRenameReg();
2179 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2181 <<
"' not modified: "
2182 << (RtNotModified ?
"true" :
"false") <<
"\n");
2184 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2186 Flags.setMergeForward(
true);
2187 Flags.clearRenameReg();
2192 MaybeCanRename, FirstMI,
MI, Reg, DefinedInBB, UsedInBetween,
2193 RequiredClasses,
TRI);
2195 Flags.setMergeForward(
true);
2196 Flags.setRenameReg(*RenameReg);
2197 MBBIWithRenameReg =
MBBI;
2200 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2201 <<
"interference in between, keep looking.\n");
2205 if (
Flags.getRenameReg())
2206 return MBBIWithRenameReg;
2220 if (!ModifiedRegUnits.available(BaseReg)) {
2226 if (
MI.mayLoadOrStore())
2234 assert((
MI.getOpcode() == AArch64::SUBXri ||
2235 MI.getOpcode() == AArch64::ADDXri) &&
2236 "Expected a register update instruction");
2237 auto End =
MI.getParent()->end();
2238 if (MaybeCFI ==
End ||
2239 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2242 MI.getOperand(0).getReg() != AArch64::SP)
2246 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2257std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2259 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2260 assert((Update->getOpcode() == AArch64::ADDXri ||
2261 Update->getOpcode() == AArch64::SUBXri) &&
2262 "Unexpected base register update instruction to merge!");
2278 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2279 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2281 return std::nullopt;
2292 if (NextI == Update)
2295 int Value = Update->getOperand(2).getImm();
2297 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2298 if (Update->getOpcode() == AArch64::SUBXri)
2304 int Scale, MinOffset, MaxOffset;
2308 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2310 .
add(Update->getOperand(0))
2318 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2320 .
add(Update->getOperand(0))
2345 I->eraseFromParent();
2346 Update->eraseFromParent();
2354 unsigned Offset,
int Scale) {
2355 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2356 "Unexpected const mov instruction to merge!");
2361 unsigned Mask = (1 << 12) * Scale - 1;
2370 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2378 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2386 ++NumConstOffsetFolded;
2401 I->eraseFromParent();
2402 PrevI->eraseFromParent();
2403 Update->eraseFromParent();
2408bool AArch64LoadStoreOpt::isMatchingUpdateInsn(
MachineInstr &MemMI,
2410 unsigned BaseReg,
int Offset) {
2411 switch (
MI.getOpcode()) {
2414 case AArch64::SUBXri:
2415 case AArch64::ADDXri:
2418 if (!
MI.getOperand(2).isImm())
2426 if (
MI.getOperand(0).getReg() != BaseReg ||
2427 MI.getOperand(1).getReg() != BaseReg)
2430 int UpdateOffset =
MI.getOperand(2).getImm();
2431 if (
MI.getOpcode() == AArch64::SUBXri)
2432 UpdateOffset = -UpdateOffset;
2436 int Scale, MinOffset, MaxOffset;
2438 if (UpdateOffset % Scale != 0)
2442 int ScaledOffset = UpdateOffset / Scale;
2443 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2455bool AArch64LoadStoreOpt::isMatchingMovConstInsn(
MachineInstr &MemMI,
2461 if (
MI.getOpcode() == AArch64::MOVKWi &&
2462 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2473 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2476 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2479 return Offset >> 24 == 0;
2493 TII->getMemScale(MemMI);
2498 if (MIUnscaledOffset != UnscaledOffset)
2509 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2511 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2518 ModifiedRegUnits.clear();
2519 UsedRegUnits.clear();
2525 const bool BaseRegSP =
BaseReg == AArch64::SP;
2545 if (!
MI.isTransient())
2549 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2560 if (!ModifiedRegUnits.available(BaseReg) ||
2561 !UsedRegUnits.available(BaseReg) ||
2562 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2566 if (!VisitSucc || Limit <= Count)
2572 unsigned LiveSuccCount = 0;
2575 if (Succ->isLiveIn(*AI)) {
2576 if (LiveSuccCount++)
2578 if (Succ->pred_size() == 1)
2586 CurMBB = SuccToVisit;
2607 : AArch64::NoRegister};
2616 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2617 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2621 const bool BaseRegSP =
BaseReg == AArch64::SP;
2630 unsigned RedZoneSize =
2635 ModifiedRegUnits.clear();
2636 UsedRegUnits.clear();
2638 bool MemAccessBeforeSPPreInc =
false;
2646 if (!
MI.isTransient())
2650 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2653 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2663 if (!ModifiedRegUnits.available(BaseReg) ||
2664 !UsedRegUnits.available(BaseReg))
2671 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2672 (DestReg[0] != AArch64::NoRegister &&
2673 !(ModifiedRegUnits.available(DestReg[0]) &&
2674 UsedRegUnits.available(DestReg[0]))) ||
2675 (DestReg[1] != AArch64::NoRegister &&
2676 !(ModifiedRegUnits.available(DestReg[1]) &&
2677 UsedRegUnits.available(DestReg[1]))))
2678 MergeEither =
false;
2683 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2684 MemAccessBeforeSPPreInc =
true;
2685 }
while (
MBBI !=
B && Count < Limit);
2690AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2713 ModifiedRegUnits.clear();
2714 UsedRegUnits.clear();
2722 if (!
MI.isTransient())
2726 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2735 if (!ModifiedRegUnits.available(IndexReg) ||
2736 !UsedRegUnits.available(IndexReg))
2739 }
while (
MBBI !=
B && Count < Limit);
2743bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2747 if (
MI.hasOrderedMemoryRef())
2761 ++NumLoadsFromStoresPromoted;
2765 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2772bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2778 if (!
TII->isCandidateToMergeOrPair(
MI))
2782 LdStPairFlags
Flags;
2786 ++NumZeroStoresPromoted;
2790 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2802 if (!
TII->isCandidateToMergeOrPair(
MI))
2806 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2810 if (
MI.mayStore() && Subtarget->hasDisableStp())
2816 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2818 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2826 LdStPairFlags
Flags;
2832 auto Prev = std::prev(
MBBI);
2837 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2842 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2843 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2845 if (!
MemOp || !
MemOp->getMemoryType().isValid()) {
2846 NumFailedAlignmentCheck++;
2856 if (MemAlignment < 2 * TypeAlignment) {
2857 NumFailedAlignmentCheck++;
2863 if (
TII->hasUnscaledLdStOffset(
MI))
2864 ++NumUnscaledPairCreated;
2866 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2869 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2877bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2891 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2900 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2909 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2912 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2913 true, MergeEither)) {
2922 int UnscaledOffset =
2930 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2933 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2951 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2963 if (Update != E && (
Offset & (Scale - 1)) == 0) {
2973 bool EnableNarrowZeroStOpt) {
3005 if (EnableNarrowZeroStOpt)
3029 DefinedInBB.
clear();
3030 DefinedInBB.addLiveIns(
MBB);
3038 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3084 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3089 ModifiedRegUnits.init(*
TRI);
3090 UsedRegUnits.init(*
TRI);
3091 DefinedInBB.init(*
TRI);
3094 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3095 for (
auto &
MBB : Fn) {
3116 return new AArch64LoadStoreOpt();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static int alignTo(int Num, int PowOf2)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A private abstract base class describing the concept of an individual alias analysis implementation.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
reverse_instr_iterator instr_rend()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM Value Representation.
self_iterator getIterator()
A range adaptor for a pair of iterators.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.