64#define DEBUG_TYPE "memcpyopt"
67 "enable-memcpyopt-without-libcalls",
cl::Hidden,
68 cl::desc(
"Enable memcpyopt even when libcalls are disabled"));
70STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
71STATISTIC(NumMemMoveInstr,
"Number of memmove instructions deleted");
72STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
73STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
75STATISTIC(NumCallSlot,
"Number of call slot optimizations performed");
76STATISTIC(NumStackMove,
"Number of stack-move optimizations performed");
105 bool isProfitableToUseMemset(
const DataLayout &
DL)
const;
110bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
DL)
const {
112 if (TheStores.size() >= 4 ||
End - Start >= 16)
116 if (TheStores.size() < 2)
122 if (!isa<StoreInst>(SI))
127 if (TheStores.size() == 2)
141 unsigned MaxIntSize =
DL.getLargestLegalIntTypeSizeInBits() / 8;
144 unsigned NumPointerStores = Bytes / MaxIntSize;
147 unsigned NumByteStores = Bytes % MaxIntSize;
152 return TheStores.size() > NumPointerStores + NumByteStores;
172 bool empty()
const {
return Ranges.empty(); }
174 void addInst(int64_t OffsetFromFirst,
Instruction *Inst) {
175 if (
auto *SI = dyn_cast<StoreInst>(Inst))
176 addStore(OffsetFromFirst, SI);
178 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
181 void addStore(int64_t OffsetFromFirst,
StoreInst *SI) {
182 TypeSize StoreSize =
DL.getTypeStoreSize(
SI->getOperand(0)->getType());
185 SI->getPointerOperand(),
SI->getAlign(), SI);
188 void addMemSet(int64_t OffsetFromFirst,
MemSetInst *MSI) {
189 int64_t
Size = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
202void MemsetRanges::addRange(int64_t Start, int64_t
Size,
Value *
Ptr,
207 Ranges, [=](
const MemsetRange &O) {
return O.End < Start; });
212 if (
I ==
Ranges.end() || End < I->Start) {
213 MemsetRange &
R = *
Ranges.insert(
I, MemsetRange());
217 R.Alignment = Alignment;
218 R.TheStores.push_back(Inst);
223 I->TheStores.push_back(Inst);
227 if (
I->Start <= Start &&
I->End >=
End)
236 if (Start < I->Start) {
239 I->Alignment = Alignment;
247 range_iterator NextI =
I;
248 while (++NextI !=
Ranges.end() &&
End >= NextI->Start) {
250 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
251 if (NextI->End >
I->End)
267 assert(Start->getParent() ==
End->getParent() &&
"Must be in same block");
269 if (Start->getFunction()->doesNotThrow())
274 bool RequiresNoCaptureBeforeUnwind;
276 RequiresNoCaptureBeforeUnwind) &&
277 !RequiresNoCaptureBeforeUnwind)
288 I->eraseFromParent();
299 assert(Start->getBlock() ==
End->getBlock() &&
"Only local supported");
302 Instruction *
I = cast<MemoryUseOrDef>(MA).getMemoryInst();
304 auto *
II = dyn_cast<IntrinsicInst>(
I);
305 if (
II &&
II->getIntrinsicID() == Intrinsic::lifetime_start &&
306 SkippedLifetimeStart && !*SkippedLifetimeStart) {
307 *SkippedLifetimeStart =
I;
322 if (isa<MemoryUse>(
End)) {
326 return Start->getBlock() !=
End->getBlock() ||
328 make_range(std::next(Start->getIterator()),
End->getIterator()),
330 if (isa<MemoryUse>(&Acc))
332 Instruction *AccInst =
333 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
334 return isModSet(AA.getModRefInfo(AccInst, Loc));
340 End->getDefiningAccess(), Loc, AA);
354 if (
auto *SI = dyn_cast<StoreInst>(StartInst))
355 if (
DL.getTypeStoreSize(
SI->getOperand(0)->getType()).isScalable())
370 for (++BI; !BI->isTerminator(); ++BI) {
374 MemInsertPoint = CurrentAcc;
378 if (
auto *CB = dyn_cast<CallBase>(BI)) {
379 if (CB->onlyAccessesInaccessibleMemory())
383 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
387 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
392 if (
auto *NextStore = dyn_cast<StoreInst>(BI)) {
394 if (!NextStore->isSimple())
397 Value *StoredVal = NextStore->getValueOperand();
405 if (
DL.getTypeStoreSize(StoredVal->
getType()).isScalable())
410 if (isa<UndefValue>(ByteVal) && StoredByte)
411 ByteVal = StoredByte;
412 if (ByteVal != StoredByte)
416 std::optional<int64_t>
Offset =
417 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr,
DL);
423 auto *MSI = cast<MemSetInst>(BI);
425 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
426 !isa<ConstantInt>(MSI->getLength()))
430 std::optional<int64_t>
Offset =
431 MSI->getDest()->getPointerOffsetFrom(StartPtr,
DL);
447 Ranges.addInst(0, StartInst);
457 for (
const MemsetRange &
Range : Ranges) {
458 if (
Range.TheStores.size() == 1)
462 if (!
Range.isProfitableToUseMemset(
DL))
467 StartPtr =
Range.StartPtr;
469 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal,
Range.End -
Range.Start,
476 dbgs() <<
"With: " << *AMemSet <<
'\n');
477 if (!
Range.TheStores.empty())
480 auto *NewDef = cast<MemoryDef>(
485 MemInsertPoint = NewDef;
489 eraseInstruction(SI);
510 auto AddArg = [&](
Value *Arg) {
511 auto *
I = dyn_cast<Instruction>(Arg);
512 if (
I &&
I->getParent() ==
SI->getParent()) {
520 if (!AddArg(
SI->getPointerOperand()))
534 for (
auto I = --
SI->getIterator(), E =
P->getIterator();
I != E; --
I) {
544 bool NeedLift =
false;
566 else if (
const auto *Call = dyn_cast<CallBase>(
C)) {
572 }
else if (isa<LoadInst>(
C) || isa<StoreInst>(
C) || isa<VAArgInst>(
C)) {
578 MemLocs.push_back(
ML);
599 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
614 I->moveBefore(
P->getIterator());
615 assert(MemInsertPoint &&
"Must have found insert point");
637 if (
T->isAggregateType() &&
639 (TLI->
has(LibFunc_memcpy) && TLI->
has(LibFunc_memmove)))) {
648 StoreAccess->getDefiningAccess(), LoadLoc, BAA);
650 ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst()
657 if (
P == SI || moveUp(SI,
P, LI)) {
662 bool UseMemMove =
false;
668 Builder.CreateTypeSize(Builder.getInt64Ty(),
DL.getTypeStoreSize(
T));
671 M = Builder.CreateMemMove(
SI->getPointerOperand(),
SI->getAlign(),
675 M = Builder.CreateMemCpy(
SI->getPointerOperand(),
SI->getAlign(),
677 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
679 LLVM_DEBUG(
dbgs() <<
"Promoting " << *LI <<
" to " << *SI <<
" => " << *M
684 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
686 eraseInstruction(SI);
687 eraseInstruction(LI);
691 BBI =
M->getIterator();
699 auto GetCall = [&]() ->
CallInst * {
702 if (
auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
704 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
708 bool Changed = performCallSlotOptzn(
709 LI, SI,
SI->getPointerOperand()->stripPointerCasts(),
711 DL.getTypeStoreSize(
SI->getOperand(0)->getType()),
712 std::min(
SI->getAlign(), LI->
getAlign()), BAA, GetCall);
714 eraseInstruction(SI);
715 eraseInstruction(LI);
723 if (
auto *DestAlloca = dyn_cast<AllocaInst>(
SI->getPointerOperand())) {
725 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
726 DL.getTypeStoreSize(
T), BAA)) {
728 BBI =
SI->getNextNonDebugInstruction()->getIterator();
729 eraseInstruction(SI);
730 eraseInstruction(LI);
750 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
755 Value *StoredVal =
SI->getValueOperand();
763 if (
auto *LI = dyn_cast<LoadInst>(StoredVal))
764 return processStoreOfLoad(SI, LI,
DL, BBI);
785 tryMergingIntoMemset(SI,
SI->getPointerOperand(), ByteVal)) {
786 BBI =
I->getIterator();
793 auto *
T =
V->getType();
794 if (!
T->isAggregateType())
798 if (
Size.isScalable())
802 auto *
M = Builder.CreateMemSet(
SI->getPointerOperand(), ByteVal,
Size,
804 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
806 LLVM_DEBUG(
dbgs() <<
"Promoting " << *SI <<
" to " << *M <<
"\n");
812 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
false);
814 eraseInstruction(SI);
818 BBI =
M->getIterator();
828 BBI =
I->getIterator();
837bool MemCpyOptPass::performCallSlotOptzn(
Instruction *cpyLoad,
862 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
866 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
871 TypeSize SrcAllocaSize =
DL.getTypeAllocSize(srcAlloca->getAllocatedType());
877 if (cpySize < srcSize)
886 if (
F->isIntrinsic() &&
F->getIntrinsicID() == Intrinsic::lifetime_start)
889 if (
C->getParent() != cpyStore->
getParent()) {
895 isa<StoreInst>(cpyStore)
904 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer modified after call\n");
911 if (SkippedLifetimeStart) {
913 dyn_cast<Instruction>(SkippedLifetimeStart->
getOperand(1));
914 if (LifetimeArg && LifetimeArg->getParent() ==
C->getParent() &&
915 C->comesBefore(LifetimeArg))
921 bool ExplicitlyDereferenceableOnly;
923 ExplicitlyDereferenceableOnly) ||
926 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer not dereferenceable\n");
945 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest may be visible through unwinding\n");
950 Align srcAlign = srcAlloca->getAlign();
951 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
954 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
955 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest not sufficiently aligned\n");
964 while (!srcUseList.empty()) {
965 User *
U = srcUseList.pop_back_val();
967 if (isa<AddrSpaceCastInst>(U)) {
971 if (isa<LifetimeIntrinsic>(U))
974 if (U !=
C && U != cpyLoad) {
975 LLVM_DEBUG(
dbgs() <<
"Call slot: Source accessed by " << *U <<
"\n");
982 bool SrcIsCaptured =
any_of(
C->args(), [&](
Use &U) {
983 return U->stripPointerCasts() == cpySrc &&
984 !C->doesNotCapture(C->getArgOperandNo(&U));
1005 make_range(++
C->getIterator(),
C->getParent()->end())) {
1007 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
1008 if (
II->getIntrinsicID() == Intrinsic::lifetime_end &&
1009 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1010 cast<ConstantInt>(
II->getArgOperand(0))->uge(srcSize))
1015 if (isa<ReturnInst>(&
I))
1033 bool NeedMoveGEP =
false;
1036 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1037 if (
GEP &&
GEP->hasAllConstantIndices() &&
1060 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1061 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1062 cpySrc->
getType() !=
C->getArgOperand(ArgI)->getType())
1066 bool changedArgument =
false;
1067 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1068 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1069 changedArgument =
true;
1070 C->setArgOperand(ArgI, cpyDest);
1073 if (!changedArgument)
1077 if (!isDestSufficientlyAligned) {
1078 assert(isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!");
1079 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1083 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1084 GEP->moveBefore(
C->getIterator());
1087 if (SkippedLifetimeStart) {
1088 SkippedLifetimeStart->
moveBefore(
C->getIterator());
1094 if (cpyLoad != cpyStore)
1103bool MemCpyOptPass::processMemCpyMemCpyDependence(
MemCpyInst *M,
1118 int64_t MForwardOffset = 0;
1122 if (
M->getSource() != MDep->
getDest()) {
1123 std::optional<int64_t>
Offset =
1124 M->getSource()->getPointerOffsetFrom(MDep->
getDest(),
DL);
1127 MForwardOffset = *
Offset;
1132 if (MForwardOffset != 0 || MDep->
getLength() !=
M->getLength()) {
1133 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
1134 auto *MLen = dyn_cast<ConstantInt>(
M->getLength());
1135 if (!MDepLen || !MLen ||
1136 MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1144 if (NewCopySource && NewCopySource->
use_empty())
1150 eraseInstruction(NewCopySource);
1162 if (MForwardOffset > 0) {
1164 std::optional<int64_t> MDestOffset =
1166 if (MDestOffset == MForwardOffset)
1167 CopySource =
M->getDest();
1169 CopySource = Builder.CreateInBoundsPtrAdd(
1170 CopySource, Builder.getInt64(MForwardOffset));
1171 NewCopySource = dyn_cast<Instruction>(CopySource);
1174 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1175 if (CopySourceAlign)
1199 eraseInstruction(M);
1209 bool UseMemMove =
false;
1214 if (isa<MemCpyInlineInst>(M))
1220 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1229 Builder.CreateMemMove(
M->getDest(),
M->getDestAlign(), CopySource,
1230 CopySourceAlign,
M->getLength(),
M->isVolatile());
1231 else if (isa<MemCpyInlineInst>(M)) {
1235 NewM = Builder.CreateMemCpyInline(
M->getDest(),
M->getDestAlign(),
1236 CopySource, CopySourceAlign,
1237 M->getLength(),
M->isVolatile());
1240 Builder.CreateMemCpy(
M->getDest(),
M->getDestAlign(), CopySource,
1241 CopySourceAlign,
M->getLength(),
M->isVolatile());
1247 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1250 eraseInstruction(M);
1274bool MemCpyOptPass::processMemSetMemCpyDependence(
MemCpyInst *MemCpy,
1312 if (DestSize == SrcSize) {
1313 eraseInstruction(MemSet);
1324 if (
auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1335 "Preserving debug location based on moving memset within BB.");
1336 Builder.SetCurrentDebugLocation(MemSet->
getDebugLoc());
1342 SrcSize = Builder.CreateZExt(SrcSize, DestSize->
getType());
1344 DestSize = Builder.CreateZExt(DestSize, SrcSize->
getType());
1347 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1348 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1349 Value *MemsetLen = Builder.CreateSelect(
1352 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1353 MemSet->
getOperand(1), MemsetLen, Alignment);
1356 "MemCpy must be a MemoryDef");
1362 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1364 eraseInstruction(MemSet);
1375 if (
auto *
II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1376 if (
II->getIntrinsicID() == Intrinsic::lifetime_start) {
1377 auto *LTSize = cast<ConstantInt>(
II->getArgOperand(0));
1379 if (
auto *CSize = dyn_cast<ConstantInt>(
Size)) {
1381 LTSize->getZExtValue() >= CSize->getZExtValue())
1392 if (std::optional<TypeSize> AllocaSize =
1393 Alloca->getAllocationSize(
DL))
1394 if (*AllocaSize == LTSize->getValue())
1416bool MemCpyOptPass::performMemCpyToMemSetOptzn(
MemCpyInst *MemCpy,
1427 if (MemSetSize != CopySize) {
1432 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1437 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1440 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1446 bool CanReduceSize =
false;
1450 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1452 CanReduceSize =
true;
1456 CopySize = MemSetSize;
1466 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1499 if (!SrcSize ||
Size != *SrcSize) {
1500 LLVM_DEBUG(
dbgs() <<
"Stack Move: Source alloca size mismatch\n");
1504 if (!DestSize ||
Size != *DestSize) {
1505 LLVM_DEBUG(
dbgs() <<
"Stack Move: Destination alloca size mismatch\n");
1519 bool SrcNotDom =
false;
1523 bool CanBeNull, CanBeFreed;
1524 return V->getPointerDereferenceableBytes(
DL, CanBeNull, CanBeFreed);
1527 auto CaptureTrackingWithModRef =
1533 Worklist.
reserve(MaxUsesToExplore);
1535 while (!Worklist.
empty()) {
1538 for (
const Use &U :
I->uses()) {
1539 auto *UI = cast<Instruction>(
U.getUser());
1545 if (Visited.
size() >= MaxUsesToExplore) {
1548 <<
"Stack Move: Exceeded max uses to see ModRef, bailing\n");
1551 if (!Visited.
insert(&U).second)
1561 if (UI->isLifetimeStartOrEnd()) {
1567 int64_t
Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1568 if (
Size < 0 ||
Size == DestSize) {
1573 if (UI->hasMetadata(LLVMContext::MD_noalias))
1574 NoAliasInstrs.
insert(UI);
1575 if (!ModRefCallback(UI))
1590 auto DestModRefCallback = [&](
Instruction *UI) ->
bool {
1600 if (UI->getParent() ==
Store->getParent()) {
1609 if (UI->comesBefore(Store))
1619 ReachabilityWorklist.
push_back(UI->getParent());
1625 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1628 if (!ReachabilityWorklist.
empty() &&
1630 nullptr, DT,
nullptr))
1638 auto SrcModRefCallback = [&](
Instruction *UI) ->
bool {
1651 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1658 SrcAlloca->
getParent()->getFirstInsertionPt());
1665 eraseInstruction(DestAlloca);
1673 if (!LifetimeMarkers.
empty()) {
1675 eraseInstruction(
I);
1683 I->setMetadata(LLVMContext::MD_noalias,
nullptr);
1685 LLVM_DEBUG(
dbgs() <<
"Stack Move: Performed staack-move optimization\n");
1691 if (
auto *
I = dyn_cast<Instruction>(
Size))
1695 if (
auto *
C = dyn_cast<Constant>(
Size))
1696 return isa<UndefValue>(
C) ||
C->isNullValue();
1707 if (
M->isVolatile())
1711 if (
M->getSource() ==
M->getDest()) {
1713 eraseInstruction(M);
1720 eraseInstruction(M);
1730 if (
auto *GV = dyn_cast<GlobalVariable>(
M->getSource()))
1731 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1733 M->getDataLayout())) {
1736 M->getRawDest(), ByteVal,
M->getLength(),
M->getDestAlign(),
false);
1737 auto *LastDef = cast<MemoryDef>(MA);
1740 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1742 eraseInstruction(M);
1758 if (
auto *MD = dyn_cast<MemoryDef>(DestClobber))
1759 if (
auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1760 if (DestClobber->
getBlock() ==
M->getParent())
1761 if (processMemSetMemCpyDependence(M, MDep, BAA))
1775 if (
auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1777 if (
auto *CopySize = dyn_cast<ConstantInt>(
M->getLength())) {
1778 if (
auto *
C = dyn_cast<CallInst>(
MI)) {
1779 if (performCallSlotOptzn(M, M,
M->getDest(),
M->getSource(),
1781 M->getDestAlign().valueOrOne(), BAA,
1784 <<
" call: " << *
C <<
"\n"
1785 <<
" memcpy: " << *M <<
"\n");
1786 eraseInstruction(M);
1792 if (
auto *MDep = dyn_cast<MemCpyInst>(
MI))
1793 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1795 if (
auto *MDep = dyn_cast<MemSetInst>(
MI)) {
1796 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1798 eraseInstruction(M);
1807 eraseInstruction(M);
1816 auto *DestAlloca = dyn_cast<AllocaInst>(
M->getDest());
1819 auto *SrcAlloca = dyn_cast<AllocaInst>(
M->getSource());
1825 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1828 BBI =
M->getNextNonDebugInstruction()->getIterator();
1829 eraseInstruction(M);
1839bool MemCpyOptPass::isMemMoveMemSetDependency(
MemMoveInst *M) {
1840 const auto &
DL =
M->getDataLayout();
1847 auto *MemMoveSourceOp =
M->getSource();
1848 auto *
Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1854 if (
Source->getPointerOperand() !=
M->getDest() ||
1869 auto *DestClobber = dyn_cast<MemoryDef>(
1874 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1879 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1880 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1897 if (!
M->isVolatile() && isMemMoveMemSetDependency(M)) {
1900 eraseInstruction(M);
1907 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1911 Type *ArgTys[3] = {
M->getRawDest()->getType(),
M->getRawSource()->getType(),
1912 M->getLength()->getType()};
1914 M->getModule(), Intrinsic::memcpy, ArgTys));
1924bool MemCpyOptPass::processByValArgument(
CallBase &CB,
unsigned ArgNo) {
1929 TypeSize ByValSize =
DL.getTypeAllocSize(ByValTy);
1938 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1939 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1949 auto *C1 = dyn_cast<ConstantInt>(MDep->
getLength());
1963 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1982 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to byval:\n"
1983 <<
" " << *MDep <<
"\n"
1984 <<
" " << CB <<
"\n");
2007bool MemCpyOptPass::processImmutArgument(
CallBase &CB,
unsigned ArgNo) {
2032 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(
DL);
2035 if (!AllocaSize || AllocaSize->isScalable())
2045 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
2046 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2058 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
2059 if (!MDepLen || AllocaSize != MDepLen->getValue())
2066 Align AllocaAlign = AI->getAlign();
2067 if (MemDepAlign < AllocaAlign &&
2086 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2087 <<
" " << *MDep <<
"\n"
2088 <<
" " << CB <<
"\n");
2098bool MemCpyOptPass::iterateOnFunction(
Function &
F) {
2099 bool MadeChange =
false;
2114 bool RepeatInstruction =
false;
2116 if (
auto *SI = dyn_cast<StoreInst>(
I))
2117 MadeChange |= processStore(SI, BI);
2118 else if (
auto *M = dyn_cast<MemSetInst>(
I))
2119 RepeatInstruction = processMemSet(M, BI);
2120 else if (
auto *M = dyn_cast<MemCpyInst>(
I))
2121 RepeatInstruction = processMemCpy(M, BI);
2122 else if (
auto *M = dyn_cast<MemMoveInst>(
I))
2123 RepeatInstruction = processMemMove(M, BI);
2124 else if (
auto *CB = dyn_cast<CallBase>(
I)) {
2125 for (
unsigned i = 0, e = CB->
arg_size(); i != e; ++i) {
2127 MadeChange |= processByValArgument(*CB, i);
2129 MadeChange |= processImmutArgument(*CB, i);
2134 if (RepeatInstruction) {
2135 if (BI != BB.
begin())
2153 bool MadeChange =
runImpl(
F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2167 bool MadeChange =
false;
2180 if (!iterateOnFunction(
F))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Encapsulates MemorySSA, including all data associated with memory accesses.
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Class that has the common methods + fields of memory uses/defs.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
reverse_self_iterator getReverseIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
bool isRefSet(const ModRefInfo MRI)
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.