LLVM 22.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/User.h"
43#include "llvm/IR/Value.h"
45#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
53#include <cassert>
54#include <cstdint>
55#include <iterator>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "atomic-expand"
60
61namespace {
62
63class AtomicExpandImpl {
64 const TargetLowering *TLI = nullptr;
65 const DataLayout *DL = nullptr;
66
67private:
68 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
69 LLVMContext &Ctx = FailedInst.getContext();
70
71 // TODO: Do not use generic error type.
72 Ctx.emitError(&FailedInst, Msg);
73
74 if (!FailedInst.getType()->isVoidTy())
75 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
76 FailedInst.eraseFromParent();
77 }
78
79 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
80 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
81 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
82 bool tryExpandAtomicLoad(LoadInst *LI);
83 bool expandAtomicLoadToLL(LoadInst *LI);
84 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
85 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
86 bool tryExpandAtomicStore(StoreInst *SI);
87 void expandAtomicStoreToXChg(StoreInst *SI);
88 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
89 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
90 Value *
91 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
92 Align AddrAlign, AtomicOrdering MemOpOrder,
93 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
94 void expandAtomicOpToLLSC(
95 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
96 AtomicOrdering MemOpOrder,
97 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
98 void expandPartwordAtomicRMW(
100 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
101 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
102 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
103 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
104
105 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
106 static Value *insertRMWCmpXchgLoop(
107 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
108 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
109 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
110 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
111 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
112
113 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
114 bool isIdempotentRMW(AtomicRMWInst *RMWI);
115 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
116
117 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
118 Value *PointerOperand, Value *ValueOperand,
119 Value *CASExpected, AtomicOrdering Ordering,
120 AtomicOrdering Ordering2,
121 ArrayRef<RTLIB::Libcall> Libcalls);
122 void expandAtomicLoadToLibcall(LoadInst *LI);
123 void expandAtomicStoreToLibcall(StoreInst *LI);
124 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
125 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
126
127 friend bool
129 CreateCmpXchgInstFun CreateCmpXchg);
130
131 bool processAtomicInstr(Instruction *I);
132
133public:
134 bool run(Function &F, const TargetMachine *TM);
135};
136
137class AtomicExpandLegacy : public FunctionPass {
138public:
139 static char ID; // Pass identification, replacement for typeid
140
141 AtomicExpandLegacy() : FunctionPass(ID) {
143 }
144
145 bool runOnFunction(Function &F) override;
146};
147
148// IRBuilder to be used for replacement atomic instructions.
149struct ReplacementIRBuilder
150 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
151 MDNode *MMRAMD = nullptr;
152
153 // Preserves the DebugLoc from I, and preserves still valid metadata.
154 // Enable StrictFP builder mode when appropriate.
155 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
156 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
158 [this](Instruction *I) { addMMRAMD(I); })) {
160 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
161 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
162 this->setIsFPConstrained(true);
163
164 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
165 }
166
167 void addMMRAMD(Instruction *I) {
169 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
170 }
171};
172
173} // end anonymous namespace
174
175char AtomicExpandLegacy::ID = 0;
176
177char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
178
180 "Expand Atomic instructions", false, false)
182INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
183 "Expand Atomic instructions", false, false)
184
185// Helper functions to retrieve the size of atomic instructions.
186static unsigned getAtomicOpSize(LoadInst *LI) {
187 const DataLayout &DL = LI->getDataLayout();
188 return DL.getTypeStoreSize(LI->getType());
189}
190
191static unsigned getAtomicOpSize(StoreInst *SI) {
192 const DataLayout &DL = SI->getDataLayout();
193 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
194}
195
196static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
197 const DataLayout &DL = RMWI->getDataLayout();
198 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
199}
200
201static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
202 const DataLayout &DL = CASI->getDataLayout();
203 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
204}
205
206/// Copy metadata that's safe to preserve when widening atomics.
208 const Instruction &Source) {
210 Source.getAllMetadata(MD);
211 LLVMContext &Ctx = Dest.getContext();
212 MDBuilder MDB(Ctx);
213
214 for (auto [ID, N] : MD) {
215 switch (ID) {
216 case LLVMContext::MD_dbg:
217 case LLVMContext::MD_tbaa:
218 case LLVMContext::MD_tbaa_struct:
219 case LLVMContext::MD_alias_scope:
220 case LLVMContext::MD_noalias:
221 case LLVMContext::MD_noalias_addrspace:
222 case LLVMContext::MD_access_group:
223 case LLVMContext::MD_mmra:
224 Dest.setMetadata(ID, N);
225 break;
226 default:
227 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
228 Dest.setMetadata(ID, N);
229 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
230 Dest.setMetadata(ID, N);
231
232 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
233 // uses.
234 break;
235 }
236 }
237}
238
239// Determine if a particular atomic operation has a supported size,
240// and is of appropriate alignment, to be passed through for target
241// lowering. (Versus turning into a __atomic libcall)
242template <typename Inst>
243static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
244 unsigned Size = getAtomicOpSize(I);
245 Align Alignment = I->getAlign();
246 return Alignment >= Size &&
248}
249
250bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
251 auto *LI = dyn_cast<LoadInst>(I);
252 auto *SI = dyn_cast<StoreInst>(I);
253 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
254 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
255
256 bool MadeChange = false;
257
258 // If the Size/Alignment is not supported, replace with a libcall.
259 if (LI) {
260 if (!LI->isAtomic())
261 return false;
262
263 if (!atomicSizeSupported(TLI, LI)) {
264 expandAtomicLoadToLibcall(LI);
265 return true;
266 }
267
268 if (TLI->shouldCastAtomicLoadInIR(LI) ==
269 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
270 I = LI = convertAtomicLoadToIntegerType(LI);
271 MadeChange = true;
272 }
273 } else if (SI) {
274 if (!SI->isAtomic())
275 return false;
276
277 if (!atomicSizeSupported(TLI, SI)) {
278 expandAtomicStoreToLibcall(SI);
279 return true;
280 }
281
282 if (TLI->shouldCastAtomicStoreInIR(SI) ==
283 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
284 I = SI = convertAtomicStoreToIntegerType(SI);
285 MadeChange = true;
286 }
287 } else if (RMWI) {
288 if (!atomicSizeSupported(TLI, RMWI)) {
289 expandAtomicRMWToLibcall(RMWI);
290 return true;
291 }
292
293 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
294 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
295 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
296 MadeChange = true;
297 }
298 } else if (CASI) {
299 if (!atomicSizeSupported(TLI, CASI)) {
300 expandAtomicCASToLibcall(CASI);
301 return true;
302 }
303
304 // TODO: when we're ready to make the change at the IR level, we can
305 // extend convertCmpXchgToInteger for floating point too.
306 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
307 // TODO: add a TLI hook to control this so that each target can
308 // convert to lowering the original type one at a time.
309 I = CASI = convertCmpXchgToIntegerType(CASI);
310 MadeChange = true;
311 }
312 } else
313 return false;
314
315 if (TLI->shouldInsertFencesForAtomic(I)) {
316 auto FenceOrdering = AtomicOrdering::Monotonic;
317 if (LI && isAcquireOrStronger(LI->getOrdering())) {
318 FenceOrdering = LI->getOrdering();
319 LI->setOrdering(AtomicOrdering::Monotonic);
320 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
321 FenceOrdering = SI->getOrdering();
322 SI->setOrdering(AtomicOrdering::Monotonic);
323 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
324 isAcquireOrStronger(RMWI->getOrdering()))) {
325 FenceOrdering = RMWI->getOrdering();
326 RMWI->setOrdering(AtomicOrdering::Monotonic);
327 } else if (CASI &&
328 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
329 TargetLoweringBase::AtomicExpansionKind::None &&
330 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
331 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
332 isAcquireOrStronger(CASI->getFailureOrdering()))) {
333 // If a compare and swap is lowered to LL/SC, we can do smarter fence
334 // insertion, with a stronger one on the success path than on the
335 // failure path. As a result, fence insertion is directly done by
336 // expandAtomicCmpXchg in that case.
337 FenceOrdering = CASI->getMergedOrdering();
338 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI);
339
340 CASI->setSuccessOrdering(CASOrdering);
341 CASI->setFailureOrdering(CASOrdering);
342 }
343
344 if (FenceOrdering != AtomicOrdering::Monotonic) {
345 MadeChange |= bracketInstWithFences(I, FenceOrdering);
346 }
347 } else if (I->hasAtomicStore() &&
348 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
349 auto FenceOrdering = AtomicOrdering::Monotonic;
350 if (SI)
351 FenceOrdering = SI->getOrdering();
352 else if (RMWI)
353 FenceOrdering = RMWI->getOrdering();
354 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
355 TargetLoweringBase::AtomicExpansionKind::LLSC)
356 // LLSC is handled in expandAtomicCmpXchg().
357 FenceOrdering = CASI->getSuccessOrdering();
358
359 IRBuilder Builder(I);
360 if (auto TrailingFence =
361 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
362 TrailingFence->moveAfter(I);
363 MadeChange = true;
364 }
365 }
366
367 if (LI)
368 MadeChange |= tryExpandAtomicLoad(LI);
369 else if (SI)
370 MadeChange |= tryExpandAtomicStore(SI);
371 else if (RMWI) {
372 // There are two different ways of expanding RMW instructions:
373 // - into a load if it is idempotent
374 // - into a Cmpxchg/LL-SC loop otherwise
375 // we try them in that order.
376
377 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
378 MadeChange = true;
379
380 } else {
381 MadeChange |= tryExpandAtomicRMW(RMWI);
382 }
383 } else if (CASI)
384 MadeChange |= tryExpandAtomicCmpXchg(CASI);
385
386 return MadeChange;
387}
388
389bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
390 const auto *Subtarget = TM->getSubtargetImpl(F);
391 if (!Subtarget->enableAtomicExpand())
392 return false;
393 TLI = Subtarget->getTargetLowering();
394 DL = &F.getDataLayout();
395
396 bool MadeChange = false;
397
398 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
399 BasicBlock *BB = &*BBI;
400
402
403 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
404 I = Next) {
405 Instruction &Inst = *I;
406 Next = std::next(I);
407
408 if (processAtomicInstr(&Inst)) {
409 MadeChange = true;
410
411 // New blocks may have been inserted.
412 BBE = F.end();
413 }
414 }
415 }
416
417 return MadeChange;
418}
419
420bool AtomicExpandLegacy::runOnFunction(Function &F) {
421
422 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
423 if (!TPC)
424 return false;
425 auto *TM = &TPC->getTM<TargetMachine>();
426 AtomicExpandImpl AE;
427 return AE.run(F, TM);
428}
429
431 return new AtomicExpandLegacy();
432}
433
436 AtomicExpandImpl AE;
437
438 bool Changed = AE.run(F, TM);
439 if (!Changed)
440 return PreservedAnalyses::all();
441
443}
444
445bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
446 AtomicOrdering Order) {
447 ReplacementIRBuilder Builder(I, *DL);
448
449 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
450
451 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
452 // We have a guard here because not every atomic operation generates a
453 // trailing fence.
454 if (TrailingFence)
455 TrailingFence->moveAfter(I);
456
457 return (LeadingFence || TrailingFence);
458}
459
460/// Get the iX type with the same bitwidth as T.
462AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
463 EVT VT = TLI->getMemValueType(DL, T);
464 unsigned BitWidth = VT.getStoreSizeInBits();
465 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
466 return IntegerType::get(T->getContext(), BitWidth);
467}
468
469/// Convert an atomic load of a non-integral type to an integer load of the
470/// equivalent bitwidth. See the function comment on
471/// convertAtomicStoreToIntegerType for background.
472LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
473 auto *M = LI->getModule();
474 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
475
476 ReplacementIRBuilder Builder(LI, *DL);
477
479
480 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
481 NewLI->setAlignment(LI->getAlign());
482 NewLI->setVolatile(LI->isVolatile());
483 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
484 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
485
486 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
487 LI->replaceAllUsesWith(NewVal);
488 LI->eraseFromParent();
489 return NewLI;
490}
491
493AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
495
496 auto *M = RMWI->getModule();
497 Type *NewTy =
498 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
499
500 ReplacementIRBuilder Builder(RMWI, *DL);
501
502 Value *Addr = RMWI->getPointerOperand();
503 Value *Val = RMWI->getValOperand();
504 Value *NewVal = Val->getType()->isPointerTy()
505 ? Builder.CreatePtrToInt(Val, NewTy)
506 : Builder.CreateBitCast(Val, NewTy);
507
508 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
509 RMWI->getAlign(), RMWI->getOrdering(),
510 RMWI->getSyncScopeID());
511 NewRMWI->setVolatile(RMWI->isVolatile());
512 copyMetadataForAtomic(*NewRMWI, *RMWI);
513 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
514
515 Value *NewRVal = RMWI->getType()->isPointerTy()
516 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
517 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
518 RMWI->replaceAllUsesWith(NewRVal);
519 RMWI->eraseFromParent();
520 return NewRMWI;
521}
522
523bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
524 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
526 return false;
528 expandAtomicOpToLLSC(
529 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
530 LI->getOrdering(),
531 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
532 return true;
534 return expandAtomicLoadToLL(LI);
536 return expandAtomicLoadToCmpXchg(LI);
539 return true;
541 TLI->emitExpandAtomicLoad(LI);
542 return true;
543 default:
544 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
545 }
546}
547
548bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
549 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
551 return false;
553 TLI->emitExpandAtomicStore(SI);
554 return true;
556 expandAtomicStoreToXChg(SI);
557 return true;
559 SI->setAtomic(AtomicOrdering::NotAtomic);
560 return true;
561 default:
562 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
563 }
564}
565
566bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
567 ReplacementIRBuilder Builder(LI, *DL);
568
569 // On some architectures, load-linked instructions are atomic for larger
570 // sizes than normal loads. For example, the only 64-bit load guaranteed
571 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
572 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
573 LI->getPointerOperand(), LI->getOrdering());
574 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
575
576 LI->replaceAllUsesWith(Val);
577 LI->eraseFromParent();
578
579 return true;
580}
581
582bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
583 ReplacementIRBuilder Builder(LI, *DL);
584 AtomicOrdering Order = LI->getOrdering();
585 if (Order == AtomicOrdering::Unordered)
587
589 Type *Ty = LI->getType();
590 Constant *DummyVal = Constant::getNullValue(Ty);
591
592 Value *Pair = Builder.CreateAtomicCmpXchg(
593 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
595 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
596
597 LI->replaceAllUsesWith(Loaded);
598 LI->eraseFromParent();
599
600 return true;
601}
602
603/// Convert an atomic store of a non-integral type to an integer store of the
604/// equivalent bitwidth. We used to not support floating point or vector
605/// atomics in the IR at all. The backends learned to deal with the bitcast
606/// idiom because that was the only way of expressing the notion of a atomic
607/// float or vector store. The long term plan is to teach each backend to
608/// instruction select from the original atomic store, but as a migration
609/// mechanism, we convert back to the old format which the backends understand.
610/// Each backend will need individual work to recognize the new format.
611StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
612 ReplacementIRBuilder Builder(SI, *DL);
613 auto *M = SI->getModule();
614 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
615 M->getDataLayout());
616 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
617
618 Value *Addr = SI->getPointerOperand();
619
620 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
621 NewSI->setAlignment(SI->getAlign());
622 NewSI->setVolatile(SI->isVolatile());
623 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
624 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
625 SI->eraseFromParent();
626 return NewSI;
627}
628
629void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
630 // This function is only called on atomic stores that are too large to be
631 // atomic if implemented as a native store. So we replace them by an
632 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
633 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
634 // It is the responsibility of the target to only signal expansion via
635 // shouldExpandAtomicRMW in cases where this is required and possible.
636 ReplacementIRBuilder Builder(SI, *DL);
637 AtomicOrdering Ordering = SI->getOrdering();
641 : Ordering;
642 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
643 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
644 SI->getAlign(), RMWOrdering);
645 SI->eraseFromParent();
646
647 // Now we have an appropriate swap instruction, lower it as usual.
648 tryExpandAtomicRMW(AI);
649}
650
652 Value *Loaded, Value *NewVal, Align AddrAlign,
653 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
654 Value *&Success, Value *&NewLoaded,
655 Instruction *MetadataSrc) {
656 Type *OrigTy = NewVal->getType();
657
658 // This code can go away when cmpxchg supports FP and vector types.
659 assert(!OrigTy->isPointerTy());
660 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
661 if (NeedBitcast) {
662 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
663 NewVal = Builder.CreateBitCast(NewVal, IntTy);
664 Loaded = Builder.CreateBitCast(Loaded, IntTy);
665 }
666
668 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
670 if (MetadataSrc)
671 copyMetadataForAtomic(*Pair, *MetadataSrc);
672
673 Success = Builder.CreateExtractValue(Pair, 1, "success");
674 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
675
676 if (NeedBitcast)
677 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
678}
679
680bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
681 LLVMContext &Ctx = AI->getModule()->getContext();
682 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
683 switch (Kind) {
685 return false;
687 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
688 unsigned ValueSize = getAtomicOpSize(AI);
689 if (ValueSize < MinCASSize) {
690 expandPartwordAtomicRMW(AI,
692 } else {
693 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
694 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
695 AI->getValOperand());
696 };
697 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
698 AI->getAlign(), AI->getOrdering(), PerformOp);
699 }
700 return true;
701 }
703 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
704 unsigned ValueSize = getAtomicOpSize(AI);
705 if (ValueSize < MinCASSize) {
706 expandPartwordAtomicRMW(AI,
708 } else {
710 Ctx.getSyncScopeNames(SSNs);
711 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
712 ? "system"
713 : SSNs[AI->getSyncScopeID()];
715 ORE.emit([&]() {
716 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
717 << "A compare and swap loop was generated for an atomic "
718 << AI->getOperationName(AI->getOperation()) << " operation at "
719 << MemScope << " memory scope";
720 });
722 }
723 return true;
724 }
726 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
727 unsigned ValueSize = getAtomicOpSize(AI);
728 if (ValueSize < MinCASSize) {
730 // Widen And/Or/Xor and give the target another chance at expanding it.
733 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
734 return true;
735 }
736 }
737 expandAtomicRMWToMaskedIntrinsic(AI);
738 return true;
739 }
741 TLI->emitBitTestAtomicRMWIntrinsic(AI);
742 return true;
743 }
745 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
746 return true;
747 }
749 return lowerAtomicRMWInst(AI);
751 TLI->emitExpandAtomicRMW(AI);
752 return true;
753 default:
754 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
755 }
756}
757
758namespace {
759
760struct PartwordMaskValues {
761 // These three fields are guaranteed to be set by createMaskInstrs.
762 Type *WordType = nullptr;
763 Type *ValueType = nullptr;
764 Type *IntValueType = nullptr;
765 Value *AlignedAddr = nullptr;
766 Align AlignedAddrAlignment;
767 // The remaining fields can be null.
768 Value *ShiftAmt = nullptr;
769 Value *Mask = nullptr;
770 Value *Inv_Mask = nullptr;
771};
772
774raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
775 auto PrintObj = [&O](auto *V) {
776 if (V)
777 O << *V;
778 else
779 O << "nullptr";
780 O << '\n';
781 };
782 O << "PartwordMaskValues {\n";
783 O << " WordType: ";
784 PrintObj(PMV.WordType);
785 O << " ValueType: ";
786 PrintObj(PMV.ValueType);
787 O << " AlignedAddr: ";
788 PrintObj(PMV.AlignedAddr);
789 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
790 O << " ShiftAmt: ";
791 PrintObj(PMV.ShiftAmt);
792 O << " Mask: ";
793 PrintObj(PMV.Mask);
794 O << " Inv_Mask: ";
795 PrintObj(PMV.Inv_Mask);
796 O << "}\n";
797 return O;
798}
799
800} // end anonymous namespace
801
802/// This is a helper function which builds instructions to provide
803/// values necessary for partword atomic operations. It takes an
804/// incoming address, Addr, and ValueType, and constructs the address,
805/// shift-amounts and masks needed to work with a larger value of size
806/// WordSize.
807///
808/// AlignedAddr: Addr rounded down to a multiple of WordSize
809///
810/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
811/// from AlignAddr for it to have the same value as if
812/// ValueType was loaded from Addr.
813///
814/// Mask: Value to mask with the value loaded from AlignAddr to
815/// include only the part that would've been loaded from Addr.
816///
817/// Inv_Mask: The inverse of Mask.
818static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
820 Value *Addr, Align AddrAlign,
821 unsigned MinWordSize) {
822 PartwordMaskValues PMV;
823
824 Module *M = I->getModule();
825 LLVMContext &Ctx = M->getContext();
826 const DataLayout &DL = M->getDataLayout();
827 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
828
829 PMV.ValueType = PMV.IntValueType = ValueType;
830 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
831 PMV.IntValueType =
832 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
833
834 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
835 : ValueType;
836 if (PMV.ValueType == PMV.WordType) {
837 PMV.AlignedAddr = Addr;
838 PMV.AlignedAddrAlignment = AddrAlign;
839 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
840 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
841 return PMV;
842 }
843
844 PMV.AlignedAddrAlignment = Align(MinWordSize);
845
846 assert(ValueSize < MinWordSize);
847
848 PointerType *PtrTy = cast<PointerType>(Addr->getType());
849 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
850 Value *PtrLSB;
851
852 if (AddrAlign < MinWordSize) {
853 PMV.AlignedAddr = Builder.CreateIntrinsic(
854 Intrinsic::ptrmask, {PtrTy, IntTy},
855 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
856 "AlignedAddr");
857
858 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
859 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
860 } else {
861 // If the alignment is high enough, the LSB are known 0.
862 PMV.AlignedAddr = Addr;
863 PtrLSB = ConstantInt::getNullValue(IntTy);
864 }
865
866 if (DL.isLittleEndian()) {
867 // turn bytes into bits
868 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
869 } else {
870 // turn bytes into bits, and count from the other side.
871 PMV.ShiftAmt = Builder.CreateShl(
872 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
873 }
874
875 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
876 PMV.Mask = Builder.CreateShl(
877 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
878 "Mask");
879
880 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
881
882 return PMV;
883}
884
885static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
886 const PartwordMaskValues &PMV) {
887 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
888 if (PMV.WordType == PMV.ValueType)
889 return WideWord;
890
891 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
892 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
893 return Builder.CreateBitCast(Trunc, PMV.ValueType);
894}
895
896static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
897 Value *Updated, const PartwordMaskValues &PMV) {
898 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
899 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
900 if (PMV.WordType == PMV.ValueType)
901 return Updated;
902
903 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
904
905 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
906 Value *Shift =
907 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
908 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
909 Value *Or = Builder.CreateOr(And, Shift, "inserted");
910 return Or;
911}
912
913/// Emit IR to implement a masked version of a given atomicrmw
914/// operation. (That is, only the bits under the Mask should be
915/// affected by the operation)
917 IRBuilderBase &Builder, Value *Loaded,
918 Value *Shifted_Inc, Value *Inc,
919 const PartwordMaskValues &PMV) {
920 // TODO: update to use
921 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
922 // to merge bits from two values without requiring PMV.Inv_Mask.
923 switch (Op) {
924 case AtomicRMWInst::Xchg: {
925 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
926 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
927 return FinalVal;
928 }
932 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
935 case AtomicRMWInst::Nand: {
936 // The other arithmetic ops need to be masked into place.
937 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
938 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
939 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
940 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
941 return FinalVal;
942 }
957 // Finally, other ops will operate on the full value, so truncate down to
958 // the original size, and expand out again after doing the
959 // operation. Bitcasts will be inserted for FP values.
960 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
961 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
962 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
963 return FinalVal;
964 }
965 default:
966 llvm_unreachable("Unknown atomic op");
967 }
968}
969
970/// Expand a sub-word atomicrmw operation into an appropriate
971/// word-sized operation.
972///
973/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
974/// way as a typical atomicrmw expansion. The only difference here is
975/// that the operation inside of the loop may operate upon only a
976/// part of the value.
977void AtomicExpandImpl::expandPartwordAtomicRMW(
979 // Widen And/Or/Xor and give the target another chance at expanding it.
983 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
984 return;
985 }
986 AtomicOrdering MemOpOrder = AI->getOrdering();
987 SyncScope::ID SSID = AI->getSyncScopeID();
988
989 ReplacementIRBuilder Builder(AI, *DL);
990
991 PartwordMaskValues PMV =
992 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
993 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
994
995 Value *ValOperand_Shifted = nullptr;
998 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
999 ValOperand_Shifted =
1000 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1001 "ValOperand_Shifted");
1002 }
1003
1004 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1005 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1006 AI->getValOperand(), PMV);
1007 };
1008
1009 Value *OldResult;
1011 OldResult = insertRMWCmpXchgLoop(
1012 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1013 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1014 } else {
1016 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1017 PMV.AlignedAddrAlignment, MemOpOrder,
1018 PerformPartwordOp);
1019 }
1020
1021 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1022 AI->replaceAllUsesWith(FinalOldResult);
1023 AI->eraseFromParent();
1024}
1025
1026// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1027AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1028 ReplacementIRBuilder Builder(AI, *DL);
1030
1032 Op == AtomicRMWInst::And) &&
1033 "Unable to widen operation");
1034
1035 PartwordMaskValues PMV =
1036 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1037 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1038
1039 Value *ValOperand_Shifted =
1040 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1041 PMV.ShiftAmt, "ValOperand_Shifted");
1042
1043 Value *NewOperand;
1044
1045 if (Op == AtomicRMWInst::And)
1046 NewOperand =
1047 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1048 else
1049 NewOperand = ValOperand_Shifted;
1050
1051 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1052 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1053 AI->getOrdering(), AI->getSyncScopeID());
1054
1055 copyMetadataForAtomic(*NewAI, *AI);
1056
1057 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1058 AI->replaceAllUsesWith(FinalOldResult);
1059 AI->eraseFromParent();
1060 return NewAI;
1061}
1062
1063bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1064 // The basic idea here is that we're expanding a cmpxchg of a
1065 // smaller memory size up to a word-sized cmpxchg. To do this, we
1066 // need to add a retry-loop for strong cmpxchg, so that
1067 // modifications to other parts of the word don't cause a spurious
1068 // failure.
1069
1070 // This generates code like the following:
1071 // [[Setup mask values PMV.*]]
1072 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1073 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1074 // %InitLoaded = load i32* %addr
1075 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1076 // br partword.cmpxchg.loop
1077 // partword.cmpxchg.loop:
1078 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1079 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1080 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1081 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1082 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1083 // i32 %FullWord_NewVal success_ordering failure_ordering
1084 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1085 // %Success = extractvalue { i32, i1 } %NewCI, 1
1086 // br i1 %Success, label %partword.cmpxchg.end,
1087 // label %partword.cmpxchg.failure
1088 // partword.cmpxchg.failure:
1089 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1090 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1091 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1092 // label %partword.cmpxchg.end
1093 // partword.cmpxchg.end:
1094 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1095 // %FinalOldVal = trunc i32 %tmp1 to i8
1096 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1097 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1098
1099 Value *Addr = CI->getPointerOperand();
1100 Value *Cmp = CI->getCompareOperand();
1101 Value *NewVal = CI->getNewValOperand();
1102
1103 BasicBlock *BB = CI->getParent();
1104 Function *F = BB->getParent();
1105 ReplacementIRBuilder Builder(CI, *DL);
1106 LLVMContext &Ctx = Builder.getContext();
1107
1108 BasicBlock *EndBB =
1109 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1110 auto FailureBB =
1111 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1112 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1113
1114 // The split call above "helpfully" added a branch at the end of BB
1115 // (to the wrong place).
1116 std::prev(BB->end())->eraseFromParent();
1117 Builder.SetInsertPoint(BB);
1118
1119 PartwordMaskValues PMV =
1120 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1121 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1122
1123 // Shift the incoming values over, into the right location in the word.
1124 Value *NewVal_Shifted =
1125 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1126 Value *Cmp_Shifted =
1127 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1128
1129 // Load the entire current word, and mask into place the expected and new
1130 // values
1131 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1132 InitLoaded->setVolatile(CI->isVolatile());
1133 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1134 Builder.CreateBr(LoopBB);
1135
1136 // partword.cmpxchg.loop:
1137 Builder.SetInsertPoint(LoopBB);
1138 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1139 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1140
1141 // Mask/Or the expected and new values into place in the loaded word.
1142 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1143 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1144 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1145 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1147 NewCI->setVolatile(CI->isVolatile());
1148 // When we're building a strong cmpxchg, we need a loop, so you
1149 // might think we could use a weak cmpxchg inside. But, using strong
1150 // allows the below comparison for ShouldContinue, and we're
1151 // expecting the underlying cmpxchg to be a machine instruction,
1152 // which is strong anyways.
1153 NewCI->setWeak(CI->isWeak());
1154
1155 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1156 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1157
1158 if (CI->isWeak())
1159 Builder.CreateBr(EndBB);
1160 else
1161 Builder.CreateCondBr(Success, EndBB, FailureBB);
1162
1163 // partword.cmpxchg.failure:
1164 Builder.SetInsertPoint(FailureBB);
1165 // Upon failure, verify that the masked-out part of the loaded value
1166 // has been modified. If it didn't, abort the cmpxchg, since the
1167 // masked-in part must've.
1168 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1169 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1170 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1171
1172 // Add the second value to the phi from above
1173 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1174
1175 // partword.cmpxchg.end:
1176 Builder.SetInsertPoint(CI);
1177
1178 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1179 Value *Res = PoisonValue::get(CI->getType());
1180 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1181 Res = Builder.CreateInsertValue(Res, Success, 1);
1182
1183 CI->replaceAllUsesWith(Res);
1184 CI->eraseFromParent();
1185 return true;
1186}
1187
1188void AtomicExpandImpl::expandAtomicOpToLLSC(
1189 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1190 AtomicOrdering MemOpOrder,
1191 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1192 ReplacementIRBuilder Builder(I, *DL);
1193 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1194 MemOpOrder, PerformOp);
1195
1196 I->replaceAllUsesWith(Loaded);
1197 I->eraseFromParent();
1198}
1199
1200void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1201 ReplacementIRBuilder Builder(AI, *DL);
1202
1203 PartwordMaskValues PMV =
1204 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1205 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1206
1207 // The value operand must be sign-extended for signed min/max so that the
1208 // target's signed comparison instructions can be used. Otherwise, just
1209 // zero-ext.
1210 Instruction::CastOps CastOp = Instruction::ZExt;
1211 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1212 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1213 CastOp = Instruction::SExt;
1214
1215 Value *ValOperand_Shifted = Builder.CreateShl(
1216 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1217 PMV.ShiftAmt, "ValOperand_Shifted");
1218 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1219 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1220 AI->getOrdering());
1221 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1222 AI->replaceAllUsesWith(FinalOldResult);
1223 AI->eraseFromParent();
1224}
1225
1226void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1227 AtomicCmpXchgInst *CI) {
1228 ReplacementIRBuilder Builder(CI, *DL);
1229
1230 PartwordMaskValues PMV = createMaskInstrs(
1231 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1232 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1233
1234 Value *CmpVal_Shifted = Builder.CreateShl(
1235 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1236 "CmpVal_Shifted");
1237 Value *NewVal_Shifted = Builder.CreateShl(
1238 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1239 "NewVal_Shifted");
1240 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1241 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1242 CI->getMergedOrdering());
1243 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1244 Value *Res = PoisonValue::get(CI->getType());
1245 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1246 Value *Success = Builder.CreateICmpEQ(
1247 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1248 Res = Builder.CreateInsertValue(Res, Success, 1);
1249
1250 CI->replaceAllUsesWith(Res);
1251 CI->eraseFromParent();
1252}
1253
1254Value *AtomicExpandImpl::insertRMWLLSCLoop(
1255 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1256 AtomicOrdering MemOpOrder,
1257 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1258 LLVMContext &Ctx = Builder.getContext();
1259 BasicBlock *BB = Builder.GetInsertBlock();
1260 Function *F = BB->getParent();
1261
1262 assert(AddrAlign >=
1263 F->getDataLayout().getTypeStoreSize(ResultTy) &&
1264 "Expected at least natural alignment at this point.");
1265
1266 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1267 //
1268 // The standard expansion we produce is:
1269 // [...]
1270 // atomicrmw.start:
1271 // %loaded = @load.linked(%addr)
1272 // %new = some_op iN %loaded, %incr
1273 // %stored = @store_conditional(%new, %addr)
1274 // %try_again = icmp i32 ne %stored, 0
1275 // br i1 %try_again, label %loop, label %atomicrmw.end
1276 // atomicrmw.end:
1277 // [...]
1278 BasicBlock *ExitBB =
1279 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1280 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1281
1282 // The split call above "helpfully" added a branch at the end of BB (to the
1283 // wrong place).
1284 std::prev(BB->end())->eraseFromParent();
1285 Builder.SetInsertPoint(BB);
1286 Builder.CreateBr(LoopBB);
1287
1288 // Start the main loop block now that we've taken care of the preliminaries.
1289 Builder.SetInsertPoint(LoopBB);
1290 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1291
1292 Value *NewVal = PerformOp(Builder, Loaded);
1293
1294 Value *StoreSuccess =
1295 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1296 Value *TryAgain = Builder.CreateICmpNE(
1297 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1298 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1299
1300 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1301 return Loaded;
1302}
1303
1304/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1305/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1306/// IR. As a migration step, we convert back to what use to be the standard
1307/// way to represent a pointer cmpxchg so that we can update backends one by
1308/// one.
1310AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1311 auto *M = CI->getModule();
1312 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1313 M->getDataLayout());
1314
1315 ReplacementIRBuilder Builder(CI, *DL);
1316
1317 Value *Addr = CI->getPointerOperand();
1318
1319 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1320 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1321
1322 auto *NewCI = Builder.CreateAtomicCmpXchg(
1323 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1324 CI->getFailureOrdering(), CI->getSyncScopeID());
1325 NewCI->setVolatile(CI->isVolatile());
1326 NewCI->setWeak(CI->isWeak());
1327 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1328
1329 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1330 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1331
1332 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1333
1334 Value *Res = PoisonValue::get(CI->getType());
1335 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1336 Res = Builder.CreateInsertValue(Res, Succ, 1);
1337
1338 CI->replaceAllUsesWith(Res);
1339 CI->eraseFromParent();
1340 return NewCI;
1341}
1342
1343bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1344 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1345 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1346 Value *Addr = CI->getPointerOperand();
1347 BasicBlock *BB = CI->getParent();
1348 Function *F = BB->getParent();
1349 LLVMContext &Ctx = F->getContext();
1350 // If shouldInsertFencesForAtomic() returns true, then the target does not
1351 // want to deal with memory orders, and emitLeading/TrailingFence should take
1352 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1353 // should preserve the ordering.
1354 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1355 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1357 : CI->getMergedOrdering();
1358
1359 // In implementations which use a barrier to achieve release semantics, we can
1360 // delay emitting this barrier until we know a store is actually going to be
1361 // attempted. The cost of this delay is that we need 2 copies of the block
1362 // emitting the load-linked, affecting code size.
1363 //
1364 // Ideally, this logic would be unconditional except for the minsize check
1365 // since in other cases the extra blocks naturally collapse down to the
1366 // minimal loop. Unfortunately, this puts too much stress on later
1367 // optimisations so we avoid emitting the extra logic in those cases too.
1368 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1369 SuccessOrder != AtomicOrdering::Monotonic &&
1370 SuccessOrder != AtomicOrdering::Acquire &&
1371 !F->hasMinSize();
1372
1373 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1374 // do it even on minsize.
1375 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1376
1377 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1378 //
1379 // The full expansion we produce is:
1380 // [...]
1381 // %aligned.addr = ...
1382 // cmpxchg.start:
1383 // %unreleasedload = @load.linked(%aligned.addr)
1384 // %unreleasedload.extract = extract value from %unreleasedload
1385 // %should_store = icmp eq %unreleasedload.extract, %desired
1386 // br i1 %should_store, label %cmpxchg.releasingstore,
1387 // label %cmpxchg.nostore
1388 // cmpxchg.releasingstore:
1389 // fence?
1390 // br label cmpxchg.trystore
1391 // cmpxchg.trystore:
1392 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1393 // [%releasedload, %cmpxchg.releasedload]
1394 // %updated.new = insert %new into %loaded.trystore
1395 // %stored = @store_conditional(%updated.new, %aligned.addr)
1396 // %success = icmp eq i32 %stored, 0
1397 // br i1 %success, label %cmpxchg.success,
1398 // label %cmpxchg.releasedload/%cmpxchg.failure
1399 // cmpxchg.releasedload:
1400 // %releasedload = @load.linked(%aligned.addr)
1401 // %releasedload.extract = extract value from %releasedload
1402 // %should_store = icmp eq %releasedload.extract, %desired
1403 // br i1 %should_store, label %cmpxchg.trystore,
1404 // label %cmpxchg.failure
1405 // cmpxchg.success:
1406 // fence?
1407 // br label %cmpxchg.end
1408 // cmpxchg.nostore:
1409 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1410 // [%releasedload,
1411 // %cmpxchg.releasedload/%cmpxchg.trystore]
1412 // @load_linked_fail_balance()?
1413 // br label %cmpxchg.failure
1414 // cmpxchg.failure:
1415 // fence?
1416 // br label %cmpxchg.end
1417 // cmpxchg.end:
1418 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1419 // [%loaded.trystore, %cmpxchg.trystore]
1420 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1421 // %loaded = extract value from %loaded.exit
1422 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1423 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1424 // [...]
1425 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1426 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1427 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1428 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1429 auto ReleasedLoadBB =
1430 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1431 auto TryStoreBB =
1432 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1433 auto ReleasingStoreBB =
1434 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1435 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1436
1437 ReplacementIRBuilder Builder(CI, *DL);
1438
1439 // The split call above "helpfully" added a branch at the end of BB (to the
1440 // wrong place), but we might want a fence too. It's easiest to just remove
1441 // the branch entirely.
1442 std::prev(BB->end())->eraseFromParent();
1443 Builder.SetInsertPoint(BB);
1444 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1445 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1446
1447 PartwordMaskValues PMV =
1448 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1449 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1450 Builder.CreateBr(StartBB);
1451
1452 // Start the main loop block now that we've taken care of the preliminaries.
1453 Builder.SetInsertPoint(StartBB);
1454 Value *UnreleasedLoad =
1455 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1456 Value *UnreleasedLoadExtract =
1457 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1458 Value *ShouldStore = Builder.CreateICmpEQ(
1459 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1460
1461 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1462 // jump straight past that fence instruction (if it exists).
1463 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1464
1465 Builder.SetInsertPoint(ReleasingStoreBB);
1466 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1467 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1468 Builder.CreateBr(TryStoreBB);
1469
1470 Builder.SetInsertPoint(TryStoreBB);
1471 PHINode *LoadedTryStore =
1472 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1473 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1474 Value *NewValueInsert =
1475 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1476 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1477 PMV.AlignedAddr, MemOpOrder);
1478 StoreSuccess = Builder.CreateICmpEQ(
1479 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1480 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1481 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1482 CI->isWeak() ? FailureBB : RetryBB);
1483
1484 Builder.SetInsertPoint(ReleasedLoadBB);
1485 Value *SecondLoad;
1486 if (HasReleasedLoadBB) {
1487 SecondLoad =
1488 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1489 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1490 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1491 CI->getCompareOperand(), "should_store");
1492
1493 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1494 // jump straight past that fence instruction (if it exists).
1495 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1496 // Update PHI node in TryStoreBB.
1497 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1498 } else
1499 Builder.CreateUnreachable();
1500
1501 // Make sure later instructions don't get reordered with a fence if
1502 // necessary.
1503 Builder.SetInsertPoint(SuccessBB);
1504 if (ShouldInsertFencesForAtomic ||
1505 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1506 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1507 Builder.CreateBr(ExitBB);
1508
1509 Builder.SetInsertPoint(NoStoreBB);
1510 PHINode *LoadedNoStore =
1511 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1512 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1513 if (HasReleasedLoadBB)
1514 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1515
1516 // In the failing case, where we don't execute the store-conditional, the
1517 // target might want to balance out the load-linked with a dedicated
1518 // instruction (e.g., on ARM, clearing the exclusive monitor).
1519 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1520 Builder.CreateBr(FailureBB);
1521
1522 Builder.SetInsertPoint(FailureBB);
1523 PHINode *LoadedFailure =
1524 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1525 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1526 if (CI->isWeak())
1527 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1528 if (ShouldInsertFencesForAtomic)
1529 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1530 Builder.CreateBr(ExitBB);
1531
1532 // Finally, we have control-flow based knowledge of whether the cmpxchg
1533 // succeeded or not. We expose this to later passes by converting any
1534 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1535 // PHI.
1536 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1537 PHINode *LoadedExit =
1538 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1539 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1540 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1541 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1542 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1543 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1544
1545 // This is the "exit value" from the cmpxchg expansion. It may be of
1546 // a type wider than the one in the cmpxchg instruction.
1547 Value *LoadedFull = LoadedExit;
1548
1549 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1550 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1551
1552 // Look for any users of the cmpxchg that are just comparing the loaded value
1553 // against the desired one, and replace them with the CFG-derived version.
1555 for (auto *User : CI->users()) {
1556 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1557 if (!EV)
1558 continue;
1559
1560 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1561 "weird extraction from { iN, i1 }");
1562
1563 if (EV->getIndices()[0] == 0)
1564 EV->replaceAllUsesWith(Loaded);
1565 else
1567
1568 PrunedInsts.push_back(EV);
1569 }
1570
1571 // We can remove the instructions now we're no longer iterating through them.
1572 for (auto *EV : PrunedInsts)
1573 EV->eraseFromParent();
1574
1575 if (!CI->use_empty()) {
1576 // Some use of the full struct return that we don't understand has happened,
1577 // so we've got to reconstruct it properly.
1578 Value *Res;
1579 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1580 Res = Builder.CreateInsertValue(Res, Success, 1);
1581
1582 CI->replaceAllUsesWith(Res);
1583 }
1584
1585 CI->eraseFromParent();
1586 return true;
1587}
1588
1589bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1590 // TODO: Add floating point support.
1591 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1592 if (!C)
1593 return false;
1594
1595 switch (RMWI->getOperation()) {
1596 case AtomicRMWInst::Add:
1597 case AtomicRMWInst::Sub:
1598 case AtomicRMWInst::Or:
1599 case AtomicRMWInst::Xor:
1600 return C->isZero();
1601 case AtomicRMWInst::And:
1602 return C->isMinusOne();
1603 case AtomicRMWInst::Min:
1604 return C->isMaxValue(true);
1605 case AtomicRMWInst::Max:
1606 return C->isMinValue(true);
1608 return C->isMaxValue(false);
1610 return C->isMinValue(false);
1611 default:
1612 return false;
1613 }
1614}
1615
1616bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1617 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1618 tryExpandAtomicLoad(ResultingLoad);
1619 return true;
1620 }
1621 return false;
1622}
1623
1624Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1625 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1626 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1627 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1628 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1629 LLVMContext &Ctx = Builder.getContext();
1630 BasicBlock *BB = Builder.GetInsertBlock();
1631 Function *F = BB->getParent();
1632
1633 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1634 //
1635 // The standard expansion we produce is:
1636 // [...]
1637 // %init_loaded = load atomic iN* %addr
1638 // br label %loop
1639 // loop:
1640 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1641 // %new = some_op iN %loaded, %incr
1642 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1643 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1644 // %success = extractvalue { iN, i1 } %pair, 1
1645 // br i1 %success, label %atomicrmw.end, label %loop
1646 // atomicrmw.end:
1647 // [...]
1648 BasicBlock *ExitBB =
1649 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1650 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1651
1652 // The split call above "helpfully" added a branch at the end of BB (to the
1653 // wrong place), but we want a load. It's easiest to just remove
1654 // the branch entirely.
1655 std::prev(BB->end())->eraseFromParent();
1656 Builder.SetInsertPoint(BB);
1657 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1658 Builder.CreateBr(LoopBB);
1659
1660 // Start the main loop block now that we've taken care of the preliminaries.
1661 Builder.SetInsertPoint(LoopBB);
1662 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1663 Loaded->addIncoming(InitLoaded, BB);
1664
1665 Value *NewVal = PerformOp(Builder, Loaded);
1666
1667 Value *NewLoaded = nullptr;
1668 Value *Success = nullptr;
1669
1670 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1671 MemOpOrder == AtomicOrdering::Unordered
1673 : MemOpOrder,
1674 SSID, Success, NewLoaded, MetadataSrc);
1675 assert(Success && NewLoaded);
1676
1677 Loaded->addIncoming(NewLoaded, LoopBB);
1678
1679 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1680
1681 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1682 return NewLoaded;
1683}
1684
1685bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1686 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1687 unsigned ValueSize = getAtomicOpSize(CI);
1688
1689 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1690 default:
1691 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1693 if (ValueSize < MinCASSize)
1694 return expandPartwordCmpXchg(CI);
1695 return false;
1697 return expandAtomicCmpXchg(CI);
1698 }
1700 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1701 return true;
1703 return lowerAtomicCmpXchgInst(CI);
1705 TLI->emitExpandAtomicCmpXchg(CI);
1706 return true;
1707 }
1708 }
1709}
1710
1711// Note: This function is exposed externally by AtomicExpandUtils.h
1713 CreateCmpXchgInstFun CreateCmpXchg) {
1714 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1715 Builder.setIsFPConstrained(
1716 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1717
1718 // FIXME: If FP exceptions are observable, we should force them off for the
1719 // loop for the FP atomics.
1720 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1721 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1722 AI->getOrdering(), AI->getSyncScopeID(),
1723 [&](IRBuilderBase &Builder, Value *Loaded) {
1724 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1725 AI->getValOperand());
1726 },
1727 CreateCmpXchg, /*MetadataSrc=*/AI);
1728
1729 AI->replaceAllUsesWith(Loaded);
1730 AI->eraseFromParent();
1731 return true;
1732}
1733
1734// In order to use one of the sized library calls such as
1735// __atomic_fetch_add_4, the alignment must be sufficient, the size
1736// must be one of the potentially-specialized sizes, and the value
1737// type must actually exist in C on the target (otherwise, the
1738// function wouldn't actually be defined.)
1739static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1740 const DataLayout &DL) {
1741 // TODO: "LargestSize" is an approximation for "largest type that
1742 // you can express in C". It seems to be the case that int128 is
1743 // supported on all 64-bit platforms, otherwise only up to 64-bit
1744 // integers are supported. If we get this wrong, then we'll try to
1745 // call a sized libcall that doesn't actually exist. There should
1746 // really be some more reliable way in LLVM of determining integer
1747 // sizes which are valid in the target's C ABI...
1748 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1749 return Alignment >= Size &&
1750 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1751 Size <= LargestSize;
1752}
1753
1754void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1755 static const RTLIB::Libcall Libcalls[6] = {
1756 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1757 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1758 unsigned Size = getAtomicOpSize(I);
1759
1760 bool expanded = expandAtomicOpToLibcall(
1761 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1762 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1763 if (!expanded)
1764 handleFailure(*I, "unsupported atomic load");
1765}
1766
1767void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1768 static const RTLIB::Libcall Libcalls[6] = {
1769 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1770 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1771 unsigned Size = getAtomicOpSize(I);
1772
1773 bool expanded = expandAtomicOpToLibcall(
1774 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1775 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1776 if (!expanded)
1777 handleFailure(*I, "unsupported atomic store");
1778}
1779
1780void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1781 static const RTLIB::Libcall Libcalls[6] = {
1782 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1783 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1784 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1785 unsigned Size = getAtomicOpSize(I);
1786
1787 bool expanded = expandAtomicOpToLibcall(
1788 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1789 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1790 Libcalls);
1791 if (!expanded)
1792 handleFailure(*I, "unsupported cmpxchg");
1793}
1794
1796 static const RTLIB::Libcall LibcallsXchg[6] = {
1797 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1798 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1799 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1800 static const RTLIB::Libcall LibcallsAdd[6] = {
1801 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1802 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1803 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1804 static const RTLIB::Libcall LibcallsSub[6] = {
1805 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1806 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1807 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1808 static const RTLIB::Libcall LibcallsAnd[6] = {
1809 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1810 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1811 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1812 static const RTLIB::Libcall LibcallsOr[6] = {
1813 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1814 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1815 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1816 static const RTLIB::Libcall LibcallsXor[6] = {
1817 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1818 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1819 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1820 static const RTLIB::Libcall LibcallsNand[6] = {
1821 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1822 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1823 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1824
1825 switch (Op) {
1827 llvm_unreachable("Should not have BAD_BINOP.");
1829 return ArrayRef(LibcallsXchg);
1830 case AtomicRMWInst::Add:
1831 return ArrayRef(LibcallsAdd);
1832 case AtomicRMWInst::Sub:
1833 return ArrayRef(LibcallsSub);
1834 case AtomicRMWInst::And:
1835 return ArrayRef(LibcallsAnd);
1836 case AtomicRMWInst::Or:
1837 return ArrayRef(LibcallsOr);
1838 case AtomicRMWInst::Xor:
1839 return ArrayRef(LibcallsXor);
1841 return ArrayRef(LibcallsNand);
1842 case AtomicRMWInst::Max:
1843 case AtomicRMWInst::Min:
1856 // No atomic libcalls are available for these.
1857 return {};
1858 }
1859 llvm_unreachable("Unexpected AtomicRMW operation.");
1860}
1861
1862void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1863 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1864
1865 unsigned Size = getAtomicOpSize(I);
1866
1867 bool Success = false;
1868 if (!Libcalls.empty())
1869 Success = expandAtomicOpToLibcall(
1870 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1871 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1872
1873 // The expansion failed: either there were no libcalls at all for
1874 // the operation (min/max), or there were only size-specialized
1875 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1876 // CAS libcall, via a CAS loop, instead.
1877 if (!Success) {
1879 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1880 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1881 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1882 Instruction *MetadataSrc) {
1883 // Create the CAS instruction normally...
1884 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1885 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1887 if (MetadataSrc)
1888 copyMetadataForAtomic(*Pair, *MetadataSrc);
1889
1890 Success = Builder.CreateExtractValue(Pair, 1, "success");
1891 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1892
1893 // ...and then expand the CAS into a libcall.
1894 expandAtomicCASToLibcall(Pair);
1895 });
1896 }
1897}
1898
1899// A helper routine for the above expandAtomic*ToLibcall functions.
1900//
1901// 'Libcalls' contains an array of enum values for the particular
1902// ATOMIC libcalls to be emitted. All of the other arguments besides
1903// 'I' are extracted from the Instruction subclass by the
1904// caller. Depending on the particular call, some will be null.
1905bool AtomicExpandImpl::expandAtomicOpToLibcall(
1906 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1907 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1908 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1909 assert(Libcalls.size() == 6);
1910
1911 LLVMContext &Ctx = I->getContext();
1912 Module *M = I->getModule();
1913 const DataLayout &DL = M->getDataLayout();
1914 IRBuilder<> Builder(I);
1915 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1916
1917 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1918 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1919
1920 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1921
1922 // TODO: the "order" argument type is "int", not int32. So
1923 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1924 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1925 Constant *OrderingVal =
1926 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1927 Constant *Ordering2Val = nullptr;
1928 if (CASExpected) {
1929 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1930 Ordering2Val =
1931 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1932 }
1933 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1934
1935 RTLIB::Libcall RTLibType;
1936 if (UseSizedLibcall) {
1937 switch (Size) {
1938 case 1:
1939 RTLibType = Libcalls[1];
1940 break;
1941 case 2:
1942 RTLibType = Libcalls[2];
1943 break;
1944 case 4:
1945 RTLibType = Libcalls[3];
1946 break;
1947 case 8:
1948 RTLibType = Libcalls[4];
1949 break;
1950 case 16:
1951 RTLibType = Libcalls[5];
1952 break;
1953 }
1954 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1955 RTLibType = Libcalls[0];
1956 } else {
1957 // Can't use sized function, and there's no generic for this
1958 // operation, so give up.
1959 return false;
1960 }
1961
1962 if (!TLI->getLibcallName(RTLibType)) {
1963 // This target does not implement the requested atomic libcall so give up.
1964 return false;
1965 }
1966
1967 // Build up the function call. There's two kinds. First, the sized
1968 // variants. These calls are going to be one of the following (with
1969 // N=1,2,4,8,16):
1970 // iN __atomic_load_N(iN *ptr, int ordering)
1971 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1972 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1973 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1974 // int success_order, int failure_order)
1975 //
1976 // Note that these functions can be used for non-integer atomic
1977 // operations, the values just need to be bitcast to integers on the
1978 // way in and out.
1979 //
1980 // And, then, the generic variants. They look like the following:
1981 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1982 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1983 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1984 // int ordering)
1985 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1986 // void *desired, int success_order,
1987 // int failure_order)
1988 //
1989 // The different signatures are built up depending on the
1990 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1991 // variables.
1992
1993 AllocaInst *AllocaCASExpected = nullptr;
1994 AllocaInst *AllocaValue = nullptr;
1995 AllocaInst *AllocaResult = nullptr;
1996
1997 Type *ResultTy;
1999 AttributeList Attr;
2000
2001 // 'size' argument.
2002 if (!UseSizedLibcall) {
2003 // Note, getIntPtrType is assumed equivalent to size_t.
2004 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2005 }
2006
2007 // 'ptr' argument.
2008 // note: This assumes all address spaces share a common libfunc
2009 // implementation and that addresses are convertable. For systems without
2010 // that property, we'd need to extend this mechanism to support AS-specific
2011 // families of atomic intrinsics.
2012 Value *PtrVal = PointerOperand;
2013 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2014 Args.push_back(PtrVal);
2015
2016 // 'expected' argument, if present.
2017 if (CASExpected) {
2018 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2019 AllocaCASExpected->setAlignment(AllocaAlignment);
2020 Builder.CreateLifetimeStart(AllocaCASExpected);
2021 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2022 Args.push_back(AllocaCASExpected);
2023 }
2024
2025 // 'val' argument ('desired' for cas), if present.
2026 if (ValueOperand) {
2027 if (UseSizedLibcall) {
2028 Value *IntValue =
2029 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2030 Args.push_back(IntValue);
2031 } else {
2032 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2033 AllocaValue->setAlignment(AllocaAlignment);
2034 Builder.CreateLifetimeStart(AllocaValue);
2035 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2036 Args.push_back(AllocaValue);
2037 }
2038 }
2039
2040 // 'ret' argument.
2041 if (!CASExpected && HasResult && !UseSizedLibcall) {
2042 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2043 AllocaResult->setAlignment(AllocaAlignment);
2044 Builder.CreateLifetimeStart(AllocaResult);
2045 Args.push_back(AllocaResult);
2046 }
2047
2048 // 'ordering' ('success_order' for cas) argument.
2049 Args.push_back(OrderingVal);
2050
2051 // 'failure_order' argument, if present.
2052 if (Ordering2Val)
2053 Args.push_back(Ordering2Val);
2054
2055 // Now, the return type.
2056 if (CASExpected) {
2057 ResultTy = Type::getInt1Ty(Ctx);
2058 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2059 } else if (HasResult && UseSizedLibcall)
2060 ResultTy = SizedIntTy;
2061 else
2062 ResultTy = Type::getVoidTy(Ctx);
2063
2064 // Done with setting up arguments and return types, create the call:
2066 for (Value *Arg : Args)
2067 ArgTys.push_back(Arg->getType());
2068 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2069 FunctionCallee LibcallFn =
2070 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
2071 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2072 Call->setAttributes(Attr);
2073 Value *Result = Call;
2074
2075 // And then, extract the results...
2076 if (ValueOperand && !UseSizedLibcall)
2077 Builder.CreateLifetimeEnd(AllocaValue);
2078
2079 if (CASExpected) {
2080 // The final result from the CAS is {load of 'expected' alloca, bool result
2081 // from call}
2082 Type *FinalResultTy = I->getType();
2083 Value *V = PoisonValue::get(FinalResultTy);
2084 Value *ExpectedOut = Builder.CreateAlignedLoad(
2085 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2086 Builder.CreateLifetimeEnd(AllocaCASExpected);
2087 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2088 V = Builder.CreateInsertValue(V, Result, 1);
2089 I->replaceAllUsesWith(V);
2090 } else if (HasResult) {
2091 Value *V;
2092 if (UseSizedLibcall)
2093 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2094 else {
2095 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2096 AllocaAlignment);
2097 Builder.CreateLifetimeEnd(AllocaResult);
2098 }
2099 I->replaceAllUsesWith(V);
2100 }
2101 I->eraseFromParent();
2102 return true;
2103}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:298
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:64
void setAlignment(Align Align)
Definition: Instructions.h:132
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:506
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:612
void setWeak(bool IsWeak)
Definition: Instructions.h:569
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:560
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:599
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:657
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:549
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:567
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:564
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:587
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:625
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:709
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:843
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:853
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ FAdd
*p = old + v
Definition: Instructions.h:746
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:777
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
Definition: Instructions.h:765
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:739
@ Or
*p = old | v
Definition: Instructions.h:733
@ Sub
*p = old - v
Definition: Instructions.h:727
@ And
*p = old & v
Definition: Instructions.h:729
@ Xor
*p = old ^ v
Definition: Instructions.h:735
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:781
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
Definition: Instructions.h:761
@ FSub
*p = old - v
Definition: Instructions.h:749
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:769
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:737
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:743
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:757
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:741
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:753
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:773
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
Value * getPointerOperand()
Definition: Instructions.h:886
BinOp getOperation() const
Definition: Instructions.h:819
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:877
Value * getValOperand()
Definition: Instructions.h:890
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:863
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:622
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
reverse_iterator rbegin()
Definition: BasicBlock.h:475
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:555
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:172
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
reverse_iterator rend()
Definition: BasicBlock.h:477
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
This is an important base class in LLVM.
Definition: Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
BasicBlockListType::iterator iterator
Definition: Function.h:69
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1898
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2625
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:414
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:575
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:420
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1864
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1339
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2618
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2199
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1513
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2238
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2333
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:262
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2286
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:351
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1847
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
LLVMContext & getContext() const
Definition: IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1551
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2194
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1911
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2068
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1883
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1599
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2209
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:82
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1718
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
Class to represent integer types.
Definition: DerivedTypes.h:42
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
LLVM_ABI unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:180
Value * getPointerOperand()
Definition: Instructions.h:259
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:209
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:245
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:224
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:212
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:234
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:215
Metadata node.
Definition: Metadata.h:1077
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:285
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:720
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
bool empty() const
Definition: SmallVector.h:82
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:333
void setAlignment(Align Align)
Definition: Instructions.h:342
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:369
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
iterator_range< user_iterator > users()
Definition: Value.h:426
bool use_empty() const
Definition: Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &)
@ Success
The lock was released successfully.
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:52
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
Matching combinators.