LLVM 22.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/Analysis/Loads.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DebugInfo.h"
38#include "llvm/IR/Function.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/IntrinsicsAArch64.h"
47#include "llvm/IR/IntrinsicsAMDGPU.h"
48#include "llvm/IR/IntrinsicsARM.h"
49#include "llvm/IR/IntrinsicsHexagon.h"
50#include "llvm/IR/LLVMContext.h"
51#include "llvm/IR/Metadata.h"
53#include "llvm/IR/Statepoint.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/User.h"
56#include "llvm/IR/Value.h"
57#include "llvm/IR/ValueHandle.h"
62#include "llvm/Support/Debug.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <optional>
76#include <utility>
77#include <vector>
78
79#define DEBUG_TYPE "instcombine"
81
82using namespace llvm;
83using namespace PatternMatch;
84
85STATISTIC(NumSimplified, "Number of library calls simplified");
86
88 "instcombine-guard-widening-window",
89 cl::init(3),
90 cl::desc("How wide an instruction window to bypass looking for "
91 "another guard"));
92
93/// Return the specified type promoted as it would be to pass though a va_arg
94/// area.
96 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
97 if (ITy->getBitWidth() < 32)
98 return Type::getInt32Ty(Ty->getContext());
99 }
100 return Ty;
101}
102
103/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
104/// TODO: This should probably be integrated with visitAllocSites, but that
105/// requires a deeper change to allow either unread or unwritten objects.
107 auto *Src = MI->getRawSource();
108 while (isa<GetElementPtrInst>(Src)) {
109 if (!Src->hasOneUse())
110 return false;
111 Src = cast<Instruction>(Src)->getOperand(0);
112 }
113 return isa<AllocaInst>(Src) && Src->hasOneUse();
114}
115
117 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
118 MaybeAlign CopyDstAlign = MI->getDestAlign();
119 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
120 MI->setDestAlignment(DstAlign);
121 return MI;
122 }
123
124 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
125 MaybeAlign CopySrcAlign = MI->getSourceAlign();
126 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
127 MI->setSourceAlignment(SrcAlign);
128 return MI;
129 }
130
131 // If we have a store to a location which is known constant, we can conclude
132 // that the store must be storing the constant value (else the memory
133 // wouldn't be constant), and this must be a noop.
134 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
135 // Set the size of the copy to 0, it will be deleted on the next iteration.
136 MI->setLength((uint64_t)0);
137 return MI;
138 }
139
140 // If the source is provably undef, the memcpy/memmove doesn't do anything
141 // (unless the transfer is volatile).
142 if (hasUndefSource(MI) && !MI->isVolatile()) {
143 // Set the size of the copy to 0, it will be deleted on the next iteration.
144 MI->setLength((uint64_t)0);
145 return MI;
146 }
147
148 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
149 // load/store.
150 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
151 if (!MemOpLength) return nullptr;
152
153 // Source and destination pointer types are always "i8*" for intrinsic. See
154 // if the size is something we can handle with a single primitive load/store.
155 // A single load+store correctly handles overlapping memory in the memmove
156 // case.
157 uint64_t Size = MemOpLength->getLimitedValue();
158 assert(Size && "0-sized memory transferring should be removed already.");
159
160 if (Size > 8 || (Size&(Size-1)))
161 return nullptr; // If not 1/2/4/8 bytes, exit.
162
163 // If it is an atomic and alignment is less than the size then we will
164 // introduce the unaligned memory access which will be later transformed
165 // into libcall in CodeGen. This is not evident performance gain so disable
166 // it now.
167 if (MI->isAtomic())
168 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
169 return nullptr;
170
171 // Use an integer load+store unless we can find something better.
172 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
173
174 // If the memcpy has metadata describing the members, see if we can get the
175 // TBAA, scope and noalias tags describing our copy.
176 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
177
178 Value *Src = MI->getArgOperand(1);
179 Value *Dest = MI->getArgOperand(0);
180 LoadInst *L = Builder.CreateLoad(IntType, Src);
181 // Alignment from the mem intrinsic will be better, so use it.
182 L->setAlignment(*CopySrcAlign);
183 L->setAAMetadata(AACopyMD);
184 MDNode *LoopMemParallelMD =
185 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
186 if (LoopMemParallelMD)
187 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
188 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
189 if (AccessGroupMD)
190 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
191
192 StoreInst *S = Builder.CreateStore(L, Dest);
193 // Alignment from the mem intrinsic will be better, so use it.
194 S->setAlignment(*CopyDstAlign);
195 S->setAAMetadata(AACopyMD);
196 if (LoopMemParallelMD)
197 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
198 if (AccessGroupMD)
199 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
200 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
201
202 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
203 // non-atomics can be volatile
204 L->setVolatile(MT->isVolatile());
205 S->setVolatile(MT->isVolatile());
206 }
207 if (MI->isAtomic()) {
208 // atomics have to be unordered
209 L->setOrdering(AtomicOrdering::Unordered);
211 }
212
213 // Set the size of the copy to 0, it will be deleted on the next iteration.
214 MI->setLength((uint64_t)0);
215 return MI;
216}
217
219 const Align KnownAlignment =
220 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
221 MaybeAlign MemSetAlign = MI->getDestAlign();
222 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
223 MI->setDestAlignment(KnownAlignment);
224 return MI;
225 }
226
227 // If we have a store to a location which is known constant, we can conclude
228 // that the store must be storing the constant value (else the memory
229 // wouldn't be constant), and this must be a noop.
230 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
231 // Set the size of the copy to 0, it will be deleted on the next iteration.
232 MI->setLength((uint64_t)0);
233 return MI;
234 }
235
236 // Remove memset with an undef value.
237 // FIXME: This is technically incorrect because it might overwrite a poison
238 // value. Change to PoisonValue once #52930 is resolved.
239 if (isa<UndefValue>(MI->getValue())) {
240 // Set the size of the copy to 0, it will be deleted on the next iteration.
241 MI->setLength((uint64_t)0);
242 return MI;
243 }
244
245 // Extract the length and alignment and fill if they are constant.
246 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
247 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
248 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
249 return nullptr;
250 const uint64_t Len = LenC->getLimitedValue();
251 assert(Len && "0-sized memory setting should be removed already.");
252 const Align Alignment = MI->getDestAlign().valueOrOne();
253
254 // If it is an atomic and alignment is less than the size then we will
255 // introduce the unaligned memory access which will be later transformed
256 // into libcall in CodeGen. This is not evident performance gain so disable
257 // it now.
258 if (MI->isAtomic() && Alignment < Len)
259 return nullptr;
260
261 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
262 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
263 Value *Dest = MI->getDest();
264
265 // Extract the fill value and store.
266 Constant *FillVal = ConstantInt::get(
267 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
268 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
269 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
270 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
271 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
272 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
273 }
274
275 S->setAlignment(Alignment);
276 if (MI->isAtomic())
278
279 // Set the size of the copy to 0, it will be deleted on the next iteration.
280 MI->setLength((uint64_t)0);
281 return MI;
282 }
283
284 return nullptr;
285}
286
287// TODO, Obvious Missing Transforms:
288// * Narrow width by halfs excluding zero/undef lanes
289Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
290 Value *LoadPtr = II.getArgOperand(0);
291 const Align Alignment =
292 cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
293
294 // If the mask is all ones or undefs, this is a plain vector load of the 1st
295 // argument.
296 if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
297 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
298 "unmaskedload");
299 L->copyMetadata(II);
300 return L;
301 }
302
303 // If we can unconditionally load from this address, replace with a
304 // load/select idiom. TODO: use DT for context sensitive query
305 if (isDereferenceablePointer(LoadPtr, II.getType(),
306 II.getDataLayout(), &II, &AC)) {
307 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
308 "unmaskedload");
309 LI->copyMetadata(II);
310 return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
311 }
312
313 return nullptr;
314}
315
316// TODO, Obvious Missing Transforms:
317// * Single constant active lane -> store
318// * Narrow width by halfs excluding zero/undef lanes
319Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
320 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
321 if (!ConstMask)
322 return nullptr;
323
324 // If the mask is all zeros, this instruction does nothing.
325 if (ConstMask->isNullValue())
327
328 // If the mask is all ones, this is a plain vector store of the 1st argument.
329 if (ConstMask->isAllOnesValue()) {
330 Value *StorePtr = II.getArgOperand(1);
331 Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
332 StoreInst *S =
333 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
334 S->copyMetadata(II);
335 return S;
336 }
337
338 if (isa<ScalableVectorType>(ConstMask->getType()))
339 return nullptr;
340
341 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
342 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
343 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
344 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
345 PoisonElts))
346 return replaceOperand(II, 0, V);
347
348 return nullptr;
349}
350
351// TODO, Obvious Missing Transforms:
352// * Single constant active lane load -> load
353// * Dereferenceable address & few lanes -> scalarize speculative load/selects
354// * Adjacent vector addresses -> masked.load
355// * Narrow width by halfs excluding zero/undef lanes
356// * Vector incrementing address -> vector masked load
357Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
358 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
359 if (!ConstMask)
360 return nullptr;
361
362 // Vector splat address w/known mask -> scalar load
363 // Fold the gather to load the source vector first lane
364 // because it is reloading the same value each time
365 if (ConstMask->isAllOnesValue())
366 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
367 auto *VecTy = cast<VectorType>(II.getType());
368 const Align Alignment =
369 cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
370 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
371 Alignment, "load.scalar");
372 Value *Shuf =
373 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
375 }
376
377 return nullptr;
378}
379
380// TODO, Obvious Missing Transforms:
381// * Single constant active lane -> store
382// * Adjacent vector addresses -> masked.store
383// * Narrow store width by halfs excluding zero/undef lanes
384// * Vector incrementing address -> vector masked store
385Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
386 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
387 if (!ConstMask)
388 return nullptr;
389
390 // If the mask is all zeros, a scatter does nothing.
391 if (ConstMask->isNullValue())
393
394 // Vector splat address -> scalar store
395 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
396 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
397 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
398 if (maskContainsAllOneOrUndef(ConstMask)) {
399 Align Alignment =
400 cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
401 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
402 Alignment);
403 S->copyMetadata(II);
404 return S;
405 }
406 }
407 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
408 // lastlane), ptr
409 if (ConstMask->isAllOnesValue()) {
410 Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
411 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
412 ElementCount VF = WideLoadTy->getElementCount();
413 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
414 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
415 Value *Extract =
416 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
417 StoreInst *S =
418 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
419 S->copyMetadata(II);
420 return S;
421 }
422 }
423 if (isa<ScalableVectorType>(ConstMask->getType()))
424 return nullptr;
425
426 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
427 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
428 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
429 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
430 PoisonElts))
431 return replaceOperand(II, 0, V);
432 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
433 PoisonElts))
434 return replaceOperand(II, 1, V);
435
436 return nullptr;
437}
438
439/// This function transforms launder.invariant.group and strip.invariant.group
440/// like:
441/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
442/// launder(strip(%x)) -> launder(%x)
443/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
444/// strip(launder(%x)) -> strip(%x)
445/// This is legal because it preserves the most recent information about
446/// the presence or absence of invariant.group.
448 InstCombinerImpl &IC) {
449 auto *Arg = II.getArgOperand(0);
450 auto *StrippedArg = Arg->stripPointerCasts();
451 auto *StrippedInvariantGroupsArg = StrippedArg;
452 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
453 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
454 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
455 break;
456 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
457 }
458 if (StrippedArg == StrippedInvariantGroupsArg)
459 return nullptr; // No launders/strips to remove.
460
461 Value *Result = nullptr;
462
463 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
464 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
465 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
466 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
467 else
469 "simplifyInvariantGroupIntrinsic only handles launder and strip");
470 if (Result->getType()->getPointerAddressSpace() !=
471 II.getType()->getPointerAddressSpace())
472 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
473
474 return cast<Instruction>(Result);
475}
476
478 assert((II.getIntrinsicID() == Intrinsic::cttz ||
479 II.getIntrinsicID() == Intrinsic::ctlz) &&
480 "Expected cttz or ctlz intrinsic");
481 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
482 Value *Op0 = II.getArgOperand(0);
483 Value *Op1 = II.getArgOperand(1);
484 Value *X;
485 // ctlz(bitreverse(x)) -> cttz(x)
486 // cttz(bitreverse(x)) -> ctlz(x)
487 if (match(Op0, m_BitReverse(m_Value(X)))) {
488 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
489 Function *F =
490 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
491 return CallInst::Create(F, {X, II.getArgOperand(1)});
492 }
493
494 if (II.getType()->isIntOrIntVectorTy(1)) {
495 // ctlz/cttz i1 Op0 --> not Op0
496 if (match(Op1, m_Zero()))
497 return BinaryOperator::CreateNot(Op0);
498 // If zero is poison, then the input can be assumed to be "true", so the
499 // instruction simplifies to "false".
500 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
501 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
502 }
503
504 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
505 if (II.hasOneUse() && match(Op1, m_Zero()) &&
506 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
507 II.dropUBImplyingAttrsAndMetadata();
508 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
509 }
510
511 Constant *C;
512
513 if (IsTZ) {
514 // cttz(-x) -> cttz(x)
515 if (match(Op0, m_Neg(m_Value(X))))
516 return IC.replaceOperand(II, 0, X);
517
518 // cttz(-x & x) -> cttz(x)
519 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
520 return IC.replaceOperand(II, 0, X);
521
522 // cttz(sext(x)) -> cttz(zext(x))
523 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
524 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
525 auto *CttzZext =
526 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
527 return IC.replaceInstUsesWith(II, CttzZext);
528 }
529
530 // Zext doesn't change the number of trailing zeros, so narrow:
531 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
532 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
533 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
534 IC.Builder.getTrue());
535 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
536 return IC.replaceInstUsesWith(II, ZextCttz);
537 }
538
539 // cttz(abs(x)) -> cttz(x)
540 // cttz(nabs(x)) -> cttz(x)
541 Value *Y;
543 if (SPF == SPF_ABS || SPF == SPF_NABS)
544 return IC.replaceOperand(II, 0, X);
545
547 return IC.replaceOperand(II, 0, X);
548
549 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
550 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
551 match(Op1, m_One())) {
552 Value *ConstCttz =
553 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
554 return BinaryOperator::CreateAdd(ConstCttz, X);
555 }
556
557 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
558 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
559 match(Op1, m_One())) {
560 Value *ConstCttz =
561 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
562 return BinaryOperator::CreateSub(ConstCttz, X);
563 }
564
565 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
566 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
567 Value *Width =
568 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
569 return BinaryOperator::CreateSub(Width, X);
570 }
571 } else {
572 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
573 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
574 match(Op1, m_One())) {
575 Value *ConstCtlz =
576 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
577 return BinaryOperator::CreateAdd(ConstCtlz, X);
578 }
579
580 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
581 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
582 match(Op1, m_One())) {
583 Value *ConstCtlz =
584 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
585 return BinaryOperator::CreateSub(ConstCtlz, X);
586 }
587 }
588
589 // cttz(Pow2) -> Log2(Pow2)
590 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
591 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
592 if (IsTZ)
593 return IC.replaceInstUsesWith(II, R);
594 BinaryOperator *BO = BinaryOperator::CreateSub(
595 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
596 R);
597 BO->setHasNoSignedWrap();
599 return BO;
600 }
601
602 KnownBits Known = IC.computeKnownBits(Op0, &II);
603
604 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
605 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
606 : Known.countMaxLeadingZeros();
607 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
608 : Known.countMinLeadingZeros();
609
610 // If all bits above (ctlz) or below (cttz) the first known one are known
611 // zero, this value is constant.
612 // FIXME: This should be in InstSimplify because we're replacing an
613 // instruction with a constant.
614 if (PossibleZeros == DefiniteZeros) {
615 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
616 return IC.replaceInstUsesWith(II, C);
617 }
618
619 // If the input to cttz/ctlz is known to be non-zero,
620 // then change the 'ZeroIsPoison' parameter to 'true'
621 // because we know the zero behavior can't affect the result.
622 if (!Known.One.isZero() ||
624 if (!match(II.getArgOperand(1), m_One()))
625 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
626 }
627
628 // Add range attribute since known bits can't completely reflect what we know.
629 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
630 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
631 !II.getMetadata(LLVMContext::MD_range)) {
632 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
633 APInt(BitWidth, PossibleZeros + 1));
634 II.addRangeRetAttr(Range);
635 return &II;
636 }
637
638 return nullptr;
639}
640
642 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
643 "Expected ctpop intrinsic");
644 Type *Ty = II.getType();
645 unsigned BitWidth = Ty->getScalarSizeInBits();
646 Value *Op0 = II.getArgOperand(0);
647 Value *X, *Y;
648
649 // ctpop(bitreverse(x)) -> ctpop(x)
650 // ctpop(bswap(x)) -> ctpop(x)
651 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
652 return IC.replaceOperand(II, 0, X);
653
654 // ctpop(rot(x)) -> ctpop(x)
655 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
656 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
657 X == Y)
658 return IC.replaceOperand(II, 0, X);
659
660 // ctpop(x | -x) -> bitwidth - cttz(x, false)
661 if (Op0->hasOneUse() &&
662 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
663 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
664 {X, IC.Builder.getFalse()});
665 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
666 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
667 }
668
669 // ctpop(~x & (x - 1)) -> cttz(x, false)
670 if (match(Op0,
672 Function *F =
673 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
674 return CallInst::Create(F, {X, IC.Builder.getFalse()});
675 }
676
677 // Zext doesn't change the number of set bits, so narrow:
678 // ctpop (zext X) --> zext (ctpop X)
679 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
680 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
681 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
682 }
683
684 KnownBits Known(BitWidth);
685 IC.computeKnownBits(Op0, Known, &II);
686
687 // If all bits are zero except for exactly one fixed bit, then the result
688 // must be 0 or 1, and we can get that answer by shifting to LSB:
689 // ctpop (X & 32) --> (X & 32) >> 5
690 // TODO: Investigate removing this as its likely unnecessary given the below
691 // `isKnownToBeAPowerOfTwo` check.
692 if ((~Known.Zero).isPowerOf2())
693 return BinaryOperator::CreateLShr(
694 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
695
696 // More generally we can also handle non-constant power of 2 patterns such as
697 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
698 // ctpop(Pow2OrZero) --> icmp ne X, 0
699 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
700 return CastInst::Create(Instruction::ZExt,
703 Ty);
704
705 // Add range attribute since known bits can't completely reflect what we know.
706 if (BitWidth != 1) {
707 ConstantRange OldRange =
708 II.getRange().value_or(ConstantRange::getFull(BitWidth));
709
710 unsigned Lower = Known.countMinPopulation();
711 unsigned Upper = Known.countMaxPopulation() + 1;
712
713 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
715 Lower = 1;
716
718 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
719
720 if (Range != OldRange) {
721 II.addRangeRetAttr(Range);
722 return &II;
723 }
724 }
725
726 return nullptr;
727}
728
729/// Convert a table lookup to shufflevector if the mask is constant.
730/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
731/// which case we could lower the shufflevector with rev64 instructions
732/// as it's actually a byte reverse.
734 InstCombiner::BuilderTy &Builder) {
735 // Bail out if the mask is not a constant.
736 auto *C = dyn_cast<Constant>(II.getArgOperand(1));
737 if (!C)
738 return nullptr;
739
740 auto *VecTy = cast<FixedVectorType>(II.getType());
741 unsigned NumElts = VecTy->getNumElements();
742
743 // Only perform this transformation for <8 x i8> vector types.
744 if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
745 return nullptr;
746
747 int Indexes[8];
748
749 for (unsigned I = 0; I < NumElts; ++I) {
750 Constant *COp = C->getAggregateElement(I);
751
752 if (!COp || !isa<ConstantInt>(COp))
753 return nullptr;
754
755 Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
756
757 // Make sure the mask indices are in range.
758 if ((unsigned)Indexes[I] >= NumElts)
759 return nullptr;
760 }
761
762 auto *V1 = II.getArgOperand(0);
763 auto *V2 = Constant::getNullValue(V1->getType());
764 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
765}
766
767// Returns true iff the 2 intrinsics have the same operands, limiting the
768// comparison to the first NumOperands.
769static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
770 unsigned NumOperands) {
771 assert(I.arg_size() >= NumOperands && "Not enough operands");
772 assert(E.arg_size() >= NumOperands && "Not enough operands");
773 for (unsigned i = 0; i < NumOperands; i++)
774 if (I.getArgOperand(i) != E.getArgOperand(i))
775 return false;
776 return true;
777}
778
779// Remove trivially empty start/end intrinsic ranges, i.e. a start
780// immediately followed by an end (ignoring debuginfo or other
781// start/end intrinsics in between). As this handles only the most trivial
782// cases, tracking the nesting level is not needed:
783//
784// call @llvm.foo.start(i1 0)
785// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
786// call @llvm.foo.end(i1 0)
787// call @llvm.foo.end(i1 0) ; &I
788static bool
790 std::function<bool(const IntrinsicInst &)> IsStart) {
791 // We start from the end intrinsic and scan backwards, so that InstCombine
792 // has already processed (and potentially removed) all the instructions
793 // before the end intrinsic.
794 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
795 for (; BI != BE; ++BI) {
796 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
797 if (I->isDebugOrPseudoInst() ||
798 I->getIntrinsicID() == EndI.getIntrinsicID())
799 continue;
800 if (IsStart(*I)) {
801 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
803 IC.eraseInstFromFunction(EndI);
804 return true;
805 }
806 // Skip start intrinsics that don't pair with this end intrinsic.
807 continue;
808 }
809 }
810 break;
811 }
812
813 return false;
814}
815
817 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
818 // Bail out on the case where the source va_list of a va_copy is destroyed
819 // immediately by a follow-up va_end.
820 return II.getIntrinsicID() == Intrinsic::vastart ||
821 (II.getIntrinsicID() == Intrinsic::vacopy &&
822 I.getArgOperand(0) != II.getArgOperand(1));
823 });
824 return nullptr;
825}
826
828 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
829 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
830 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
831 Call.setArgOperand(0, Arg1);
832 Call.setArgOperand(1, Arg0);
833 return &Call;
834 }
835 return nullptr;
836}
837
838/// Creates a result tuple for an overflow intrinsic \p II with a given
839/// \p Result and a constant \p Overflow value.
841 Constant *Overflow) {
842 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
843 StructType *ST = cast<StructType>(II->getType());
845 return InsertValueInst::Create(Struct, Result, 0);
846}
847
849InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
850 WithOverflowInst *WO = cast<WithOverflowInst>(II);
851 Value *OperationResult = nullptr;
852 Constant *OverflowResult = nullptr;
853 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
854 WO->getRHS(), *WO, OperationResult, OverflowResult))
855 return createOverflowTuple(WO, OperationResult, OverflowResult);
856
857 // See whether we can optimize the overflow check with assumption information.
858 for (User *U : WO->users()) {
859 if (!match(U, m_ExtractValue<1>(m_Value())))
860 continue;
861
862 for (auto &AssumeVH : AC.assumptionsFor(U)) {
863 if (!AssumeVH)
864 continue;
865 CallInst *I = cast<CallInst>(AssumeVH);
866 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
867 continue;
868 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
869 /*AllowEphemerals=*/true))
870 continue;
871 Value *Result =
872 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
873 Result->takeName(WO);
874 if (auto *Inst = dyn_cast<Instruction>(Result)) {
875 if (WO->isSigned())
876 Inst->setHasNoSignedWrap();
877 else
878 Inst->setHasNoUnsignedWrap();
879 }
880 return createOverflowTuple(WO, Result,
881 ConstantInt::getFalse(U->getType()));
882 }
883 }
884
885 return nullptr;
886}
887
888static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
889 Ty = Ty->getScalarType();
890 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
891}
892
893static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
894 Ty = Ty->getScalarType();
895 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
896}
897
898/// \returns the compare predicate type if the test performed by
899/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
900/// floating-point environment assumed for \p F for type \p Ty
902 const Function &F, Type *Ty) {
903 switch (static_cast<unsigned>(Mask)) {
904 case fcZero:
905 if (inputDenormalIsIEEE(F, Ty))
906 return FCmpInst::FCMP_OEQ;
907 break;
908 case fcZero | fcSubnormal:
909 if (inputDenormalIsDAZ(F, Ty))
910 return FCmpInst::FCMP_OEQ;
911 break;
912 case fcPositive | fcNegZero:
913 if (inputDenormalIsIEEE(F, Ty))
914 return FCmpInst::FCMP_OGE;
915 break;
917 if (inputDenormalIsDAZ(F, Ty))
918 return FCmpInst::FCMP_OGE;
919 break;
921 if (inputDenormalIsIEEE(F, Ty))
922 return FCmpInst::FCMP_OGT;
923 break;
924 case fcNegative | fcPosZero:
925 if (inputDenormalIsIEEE(F, Ty))
926 return FCmpInst::FCMP_OLE;
927 break;
929 if (inputDenormalIsDAZ(F, Ty))
930 return FCmpInst::FCMP_OLE;
931 break;
933 if (inputDenormalIsIEEE(F, Ty))
934 return FCmpInst::FCMP_OLT;
935 break;
936 case fcPosNormal | fcPosInf:
937 if (inputDenormalIsDAZ(F, Ty))
938 return FCmpInst::FCMP_OGT;
939 break;
940 case fcNegNormal | fcNegInf:
941 if (inputDenormalIsDAZ(F, Ty))
942 return FCmpInst::FCMP_OLT;
943 break;
944 case ~fcZero & ~fcNan:
945 if (inputDenormalIsIEEE(F, Ty))
946 return FCmpInst::FCMP_ONE;
947 break;
948 case ~(fcZero | fcSubnormal) & ~fcNan:
949 if (inputDenormalIsDAZ(F, Ty))
950 return FCmpInst::FCMP_ONE;
951 break;
952 default:
953 break;
954 }
955
957}
958
959Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
960 Value *Src0 = II.getArgOperand(0);
961 Value *Src1 = II.getArgOperand(1);
962 const ConstantInt *CMask = cast<ConstantInt>(Src1);
963 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
964 const bool IsUnordered = (Mask & fcNan) == fcNan;
965 const bool IsOrdered = (Mask & fcNan) == fcNone;
966 const FPClassTest OrderedMask = Mask & ~fcNan;
967 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
968
969 const bool IsStrict =
970 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
971
972 Value *FNegSrc;
973 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
974 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
975
976 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
977 return replaceOperand(II, 0, FNegSrc);
978 }
979
980 Value *FAbsSrc;
981 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
982 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
983 return replaceOperand(II, 0, FAbsSrc);
984 }
985
986 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
987 (IsOrdered || IsUnordered) && !IsStrict) {
988 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
989 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
990 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
991 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
995 if (OrderedInvertedMask == fcInf)
996 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
997
998 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);
999 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1000 CmpInf->takeName(&II);
1001 return replaceInstUsesWith(II, CmpInf);
1002 }
1003
1004 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1005 (IsOrdered || IsUnordered) && !IsStrict) {
1006 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1007 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1008 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1009 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1010 Constant *Inf =
1011 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1012 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1013 : Builder.CreateFCmpOEQ(Src0, Inf);
1014
1015 EqInf->takeName(&II);
1016 return replaceInstUsesWith(II, EqInf);
1017 }
1018
1019 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1020 (IsOrdered || IsUnordered) && !IsStrict) {
1021 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1022 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1023 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1024 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1026 OrderedInvertedMask == fcNegInf);
1027 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1028 : Builder.CreateFCmpONE(Src0, Inf);
1029 NeInf->takeName(&II);
1030 return replaceInstUsesWith(II, NeInf);
1031 }
1032
1033 if (Mask == fcNan && !IsStrict) {
1034 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1035 // exceptions.
1036 Value *IsNan =
1037 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1038 IsNan->takeName(&II);
1039 return replaceInstUsesWith(II, IsNan);
1040 }
1041
1042 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1043 // Equivalent of !isnan. Replace with standard fcmp.
1044 Value *FCmp =
1045 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1046 FCmp->takeName(&II);
1047 return replaceInstUsesWith(II, FCmp);
1048 }
1049
1051
1052 // Try to replace with an fcmp with 0
1053 //
1054 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1055 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1056 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1057 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1058 //
1059 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1060 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1061 //
1062 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1063 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1064 //
1065 if (!IsStrict && (IsOrdered || IsUnordered) &&
1066 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1067 Src0->getType())) !=
1070 // Equivalent of == 0.
1071 Value *FCmp = Builder.CreateFCmp(
1072 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1073 Src0, Zero);
1074
1075 FCmp->takeName(&II);
1076 return replaceInstUsesWith(II, FCmp);
1077 }
1078
1079 KnownFPClass Known = computeKnownFPClass(Src0, Mask, &II);
1080
1081 // Clear test bits we know must be false from the source value.
1082 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1083 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1084 if ((Mask & Known.KnownFPClasses) != Mask) {
1085 II.setArgOperand(
1086 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1087 return &II;
1088 }
1089
1090 // If none of the tests which can return false are possible, fold to true.
1091 // fp_class (nnan x), ~(qnan|snan) -> true
1092 // fp_class (ninf x), ~(ninf|pinf) -> true
1093 if (Mask == Known.KnownFPClasses)
1094 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1095
1096 return nullptr;
1097}
1098
1099static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1100 KnownBits Known = computeKnownBits(Op, SQ);
1101 if (Known.isNonNegative())
1102 return false;
1103 if (Known.isNegative())
1104 return true;
1105
1106 Value *X, *Y;
1107 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1109
1110 return std::nullopt;
1111}
1112
1113static std::optional<bool> getKnownSignOrZero(Value *Op,
1114 const SimplifyQuery &SQ) {
1115 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1116 return Sign;
1117
1118 Value *X, *Y;
1119 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1121
1122 return std::nullopt;
1123}
1124
1125/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1126static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1127 const SimplifyQuery &SQ) {
1128 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1129 if (!Known1)
1130 return false;
1131 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1132 if (!Known0)
1133 return false;
1134 return *Known0 == *Known1;
1135}
1136
1137/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1138/// can trigger other combines.
1140 InstCombiner::BuilderTy &Builder) {
1141 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1142 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1143 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1144 "Expected a min or max intrinsic");
1145
1146 // TODO: Match vectors with undef elements, but undef may not propagate.
1147 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1148 Value *X;
1149 const APInt *C0, *C1;
1150 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1151 !match(Op1, m_APInt(C1)))
1152 return nullptr;
1153
1154 // Check for necessary no-wrap and overflow constraints.
1155 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1156 auto *Add = cast<BinaryOperator>(Op0);
1157 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1158 (!IsSigned && !Add->hasNoUnsignedWrap()))
1159 return nullptr;
1160
1161 // If the constant difference overflows, then instsimplify should reduce the
1162 // min/max to the add or C1.
1163 bool Overflow;
1164 APInt CDiff =
1165 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1166 assert(!Overflow && "Expected simplify of min/max");
1167
1168 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1169 // Note: the "mismatched" no-overflow setting does not propagate.
1170 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1171 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1172 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1173 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1174}
1175/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1176Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1177 Type *Ty = MinMax1.getType();
1178
1179 // We are looking for a tree of:
1180 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1181 // Where the min and max could be reversed
1182 Instruction *MinMax2;
1183 BinaryOperator *AddSub;
1184 const APInt *MinValue, *MaxValue;
1185 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1186 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1187 return nullptr;
1188 } else if (match(&MinMax1,
1189 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1190 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1191 return nullptr;
1192 } else
1193 return nullptr;
1194
1195 // Check that the constants clamp a saturate, and that the new type would be
1196 // sensible to convert to.
1197 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1198 return nullptr;
1199 // In what bitwidth can this be treated as saturating arithmetics?
1200 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1201 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1202 // good first approximation for what should be done there.
1203 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1204 return nullptr;
1205
1206 // Also make sure that the inner min/max and the add/sub have one use.
1207 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1208 return nullptr;
1209
1210 // Create the new type (which can be a vector type)
1211 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1212
1213 Intrinsic::ID IntrinsicID;
1214 if (AddSub->getOpcode() == Instruction::Add)
1215 IntrinsicID = Intrinsic::sadd_sat;
1216 else if (AddSub->getOpcode() == Instruction::Sub)
1217 IntrinsicID = Intrinsic::ssub_sat;
1218 else
1219 return nullptr;
1220
1221 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1222 // is usually achieved via a sext from a smaller type.
1223 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1224 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1225 return nullptr;
1226
1227 // Finally create and return the sat intrinsic, truncated to the new type
1228 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1229 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1230 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1231 return CastInst::Create(Instruction::SExt, Sat, Ty);
1232}
1233
1234
1235/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1236/// can only be one of two possible constant values -- turn that into a select
1237/// of constants.
1239 InstCombiner::BuilderTy &Builder) {
1240 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1241 Value *X;
1242 const APInt *C0, *C1;
1243 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1244 return nullptr;
1245
1247 switch (II->getIntrinsicID()) {
1248 case Intrinsic::smax:
1249 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1250 Pred = ICmpInst::ICMP_SGT;
1251 break;
1252 case Intrinsic::smin:
1253 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1254 Pred = ICmpInst::ICMP_SLT;
1255 break;
1256 case Intrinsic::umax:
1257 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1258 Pred = ICmpInst::ICMP_UGT;
1259 break;
1260 case Intrinsic::umin:
1261 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1262 Pred = ICmpInst::ICMP_ULT;
1263 break;
1264 default:
1265 llvm_unreachable("Expected min/max intrinsic");
1266 }
1267 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1268 return nullptr;
1269
1270 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1271 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1272 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1273 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1274}
1275
1276/// If this min/max has a constant operand and an operand that is a matching
1277/// min/max with a constant operand, constant-fold the 2 constant operands.
1279 IRBuilderBase &Builder,
1280 const SimplifyQuery &SQ) {
1281 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1282 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1283 if (!LHS)
1284 return nullptr;
1285
1286 Constant *C0, *C1;
1287 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1288 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1289 return nullptr;
1290
1291 // max (max X, C0), C1 --> max X, (max C0, C1)
1292 // min (min X, C0), C1 --> min X, (min C0, C1)
1293 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1294 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1295 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1296 if (InnerMinMaxID != MinMaxID &&
1297 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1298 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1299 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1300 return nullptr;
1301
1303 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1304 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1305 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1306 {LHS->getArgOperand(0), NewC});
1307}
1308
1309/// If this min/max has a matching min/max operand with a constant, try to push
1310/// the constant operand into this instruction. This can enable more folds.
1311static Instruction *
1313 InstCombiner::BuilderTy &Builder) {
1314 // Match and capture a min/max operand candidate.
1315 Value *X, *Y;
1316 Constant *C;
1317 Instruction *Inner;
1319 m_Instruction(Inner),
1321 m_Value(Y))))
1322 return nullptr;
1323
1324 // The inner op must match. Check for constants to avoid infinite loops.
1325 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1326 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1327 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1329 return nullptr;
1330
1331 // max (max X, C), Y --> max (max X, Y), C
1333 MinMaxID, II->getType());
1334 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1335 NewInner->takeName(Inner);
1336 return CallInst::Create(MinMax, {NewInner, C});
1337}
1338
1339/// Reduce a sequence of min/max intrinsics with a common operand.
1341 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1342 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1343 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1344 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1345 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1346 RHS->getIntrinsicID() != MinMaxID ||
1347 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1348 return nullptr;
1349
1350 Value *A = LHS->getArgOperand(0);
1351 Value *B = LHS->getArgOperand(1);
1352 Value *C = RHS->getArgOperand(0);
1353 Value *D = RHS->getArgOperand(1);
1354
1355 // Look for a common operand.
1356 Value *MinMaxOp = nullptr;
1357 Value *ThirdOp = nullptr;
1358 if (LHS->hasOneUse()) {
1359 // If the LHS is only used in this chain and the RHS is used outside of it,
1360 // reuse the RHS min/max because that will eliminate the LHS.
1361 if (D == A || C == A) {
1362 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1363 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1364 MinMaxOp = RHS;
1365 ThirdOp = B;
1366 } else if (D == B || C == B) {
1367 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1368 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1369 MinMaxOp = RHS;
1370 ThirdOp = A;
1371 }
1372 } else {
1373 assert(RHS->hasOneUse() && "Expected one-use operand");
1374 // Reuse the LHS. This will eliminate the RHS.
1375 if (D == A || D == B) {
1376 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1377 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1378 MinMaxOp = LHS;
1379 ThirdOp = C;
1380 } else if (C == A || C == B) {
1381 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1382 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1383 MinMaxOp = LHS;
1384 ThirdOp = D;
1385 }
1386 }
1387
1388 if (!MinMaxOp || !ThirdOp)
1389 return nullptr;
1390
1391 Module *Mod = II->getModule();
1392 Function *MinMax =
1393 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1394 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1395}
1396
1397/// If all arguments of the intrinsic are unary shuffles with the same mask,
1398/// try to shuffle after the intrinsic.
1401 if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
1402 !II->getCalledFunction()->isSpeculatable())
1403 return nullptr;
1404
1405 Value *X;
1406 Constant *C;
1407 ArrayRef<int> Mask;
1408 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1409 return isa<Constant>(Arg.get()) ||
1410 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1411 Arg.getOperandNo(), nullptr);
1412 });
1413 if (!NonConstArg ||
1414 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1415 return nullptr;
1416
1417 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1418 // instructions.
1419 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1420 return nullptr;
1421
1422 // See if all arguments are shuffled with the same mask.
1424 Type *SrcTy = X->getType();
1425 for (Use &Arg : II->args()) {
1426 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1427 Arg.getOperandNo(), nullptr))
1428 NewArgs.push_back(Arg);
1429 else if (match(&Arg,
1430 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1431 X->getType() == SrcTy)
1432 NewArgs.push_back(X);
1433 else if (match(&Arg, m_ImmConstant(C))) {
1434 // If it's a constant, try find the constant that would be shuffled to C.
1435 if (Constant *ShuffledC =
1436 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1437 NewArgs.push_back(ShuffledC);
1438 else
1439 return nullptr;
1440 } else
1441 return nullptr;
1442 }
1443
1444 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1445 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1446 // Result type might be a different vector width.
1447 // TODO: Check that the result type isn't widened?
1448 VectorType *ResTy =
1449 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1450 Value *NewIntrinsic =
1451 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1452 return new ShuffleVectorInst(NewIntrinsic, Mask);
1453}
1454
1455/// If all arguments of the intrinsic are reverses, try to pull the reverse
1456/// after the intrinsic.
1458 if (!isTriviallyVectorizable(II->getIntrinsicID()))
1459 return nullptr;
1460
1461 // At least 1 operand must be a reverse with 1 use because we are creating 2
1462 // instructions.
1463 if (none_of(II->args(), [](Value *V) {
1464 return match(V, m_OneUse(m_VecReverse(m_Value())));
1465 }))
1466 return nullptr;
1467
1468 Value *X;
1469 Constant *C;
1470 SmallVector<Value *> NewArgs;
1471 for (Use &Arg : II->args()) {
1472 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1473 Arg.getOperandNo(), nullptr))
1474 NewArgs.push_back(Arg);
1475 else if (match(&Arg, m_VecReverse(m_Value(X))))
1476 NewArgs.push_back(X);
1477 else if (isSplatValue(Arg))
1478 NewArgs.push_back(Arg);
1479 else if (match(&Arg, m_ImmConstant(C)))
1480 NewArgs.push_back(Builder.CreateVectorReverse(C));
1481 else
1482 return nullptr;
1483 }
1484
1485 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1486 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1487 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1488 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1489 return Builder.CreateVectorReverse(NewIntrinsic);
1490}
1491
1492/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1493/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1494/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1495template <Intrinsic::ID IntrID>
1497 InstCombiner::BuilderTy &Builder) {
1498 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1499 "This helper only supports BSWAP and BITREVERSE intrinsics");
1500
1501 Value *X, *Y;
1502 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1503 // don't match ConstantExpr that aren't meaningful for this transform.
1506 Value *OldReorderX, *OldReorderY;
1508
1509 // If both X and Y are bswap/bitreverse, the transform reduces the number
1510 // of instructions even if there's multiuse.
1511 // If only one operand is bswap/bitreverse, we need to ensure the operand
1512 // have only one use.
1513 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1514 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1515 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1516 }
1517
1518 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1519 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1520 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1521 }
1522
1523 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1524 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1525 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1526 }
1527 }
1528 return nullptr;
1529}
1530
1531/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1532/// `f(f(x, y), y) == f(x, y)` holds.
1534 switch (IID) {
1535 case Intrinsic::smax:
1536 case Intrinsic::smin:
1537 case Intrinsic::umax:
1538 case Intrinsic::umin:
1539 case Intrinsic::maximum:
1540 case Intrinsic::minimum:
1541 case Intrinsic::maximumnum:
1542 case Intrinsic::minimumnum:
1543 case Intrinsic::maxnum:
1544 case Intrinsic::minnum:
1545 return true;
1546 default:
1547 return false;
1548 }
1549}
1550
1551/// Attempt to simplify value-accumulating recurrences of kind:
1552/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1553/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1554/// And let the idempotent binary intrinsic be hoisted, when the operands are
1555/// known to be loop-invariant.
1557 IntrinsicInst *II) {
1558 PHINode *PN;
1559 Value *Init, *OtherOp;
1560
1561 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1562 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1563 auto IID = II->getIntrinsicID();
1564 if (!isIdempotentBinaryIntrinsic(IID) ||
1566 !IC.getDominatorTree().dominates(OtherOp, PN))
1567 return nullptr;
1568
1569 auto *InvariantBinaryInst =
1570 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1571 if (isa<FPMathOperator>(InvariantBinaryInst))
1572 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1573 return InvariantBinaryInst;
1574}
1575
1576static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1577 if (!CanReorderLanes)
1578 return nullptr;
1579
1580 Value *V;
1581 if (match(Arg, m_VecReverse(m_Value(V))))
1582 return V;
1583
1584 ArrayRef<int> Mask;
1585 if (!isa<FixedVectorType>(Arg->getType()) ||
1586 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1587 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1588 return nullptr;
1589
1590 int Sz = Mask.size();
1591 SmallBitVector UsedIndices(Sz);
1592 for (int Idx : Mask) {
1593 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1594 return nullptr;
1595 UsedIndices.set(Idx);
1596 }
1597
1598 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1599 // other changes.
1600 return UsedIndices.all() ? V : nullptr;
1601}
1602
1603/// Fold an unsigned minimum of trailing or leading zero bits counts:
1604/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
1605/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
1606/// >> ConstOp))
1607template <Intrinsic::ID IntrID>
1608static Value *
1610 const DataLayout &DL,
1611 InstCombiner::BuilderTy &Builder) {
1612 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1613 "This helper only supports cttz and ctlz intrinsics");
1614
1615 Value *CtOp;
1616 Value *ZeroUndef;
1617 if (!match(I0,
1618 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
1619 return nullptr;
1620
1621 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1622 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1623 if (!match(I1, m_CheckedInt(LessBitWidth)))
1624 // We have a constant >= BitWidth (which can be handled by CVP)
1625 // or a non-splat vector with elements < and >= BitWidth
1626 return nullptr;
1627
1628 Type *Ty = I1->getType();
1630 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1631 IntrID == Intrinsic::cttz
1632 ? ConstantInt::get(Ty, 1)
1633 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1634 cast<Constant>(I1), DL);
1635 return Builder.CreateBinaryIntrinsic(
1636 IntrID, Builder.CreateOr(CtOp, NewConst),
1637 ConstantInt::getTrue(ZeroUndef->getType()));
1638}
1639
1640/// Return whether "X LOp (Y ROp Z)" is always equal to
1641/// "(X LOp Y) ROp (X LOp Z)".
1643 bool HasNSW, Intrinsic::ID ROp) {
1644 switch (ROp) {
1645 case Intrinsic::umax:
1646 case Intrinsic::umin:
1647 if (HasNUW && LOp == Instruction::Add)
1648 return true;
1649 if (HasNUW && LOp == Instruction::Shl)
1650 return true;
1651 return false;
1652 case Intrinsic::smax:
1653 case Intrinsic::smin:
1654 return HasNSW && LOp == Instruction::Add;
1655 default:
1656 return false;
1657 }
1658}
1659
1660// Attempts to factorise a common term
1661// in an instruction that has the form "(A op' B) op (C op' D)
1662// where op is an intrinsic and op' is a binop
1663static Value *
1665 InstCombiner::BuilderTy &Builder) {
1666 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1667 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1668
1671
1672 if (!Op0 || !Op1)
1673 return nullptr;
1674
1675 if (Op0->getOpcode() != Op1->getOpcode())
1676 return nullptr;
1677
1678 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1679 return nullptr;
1680
1681 Instruction::BinaryOps InnerOpcode =
1682 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1683 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1684 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1685
1686 if (!leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode))
1687 return nullptr;
1688
1689 Value *A = Op0->getOperand(0);
1690 Value *B = Op0->getOperand(1);
1691 Value *C = Op1->getOperand(0);
1692 Value *D = Op1->getOperand(1);
1693
1694 // Attempts to swap variables such that A equals C or B equals D,
1695 // if the inner operation is commutative.
1696 if (Op0->isCommutative() && A != C && B != D) {
1697 if (A == D || B == C)
1698 std::swap(C, D);
1699 else
1700 return nullptr;
1701 }
1702
1703 BinaryOperator *NewBinop;
1704 if (A == C) {
1705 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1706 NewBinop =
1707 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1708 } else if (B == D) {
1709 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1710 NewBinop =
1711 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1712 } else {
1713 return nullptr;
1714 }
1715
1716 NewBinop->setHasNoUnsignedWrap(HasNUW);
1717 NewBinop->setHasNoSignedWrap(HasNSW);
1718
1719 return NewBinop;
1720}
1721
1722/// CallInst simplification. This mostly only handles folding of intrinsic
1723/// instructions. For normal calls, it allows visitCallBase to do the heavy
1724/// lifting.
1726 // Don't try to simplify calls without uses. It will not do anything useful,
1727 // but will result in the following folds being skipped.
1728 if (!CI.use_empty()) {
1729 SmallVector<Value *, 8> Args(CI.args());
1730 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1731 SQ.getWithInstruction(&CI)))
1732 return replaceInstUsesWith(CI, V);
1733 }
1734
1735 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1736 return visitFree(CI, FreedOp);
1737
1738 // If the caller function (i.e. us, the function that contains this CallInst)
1739 // is nounwind, mark the call as nounwind, even if the callee isn't.
1740 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1741 CI.setDoesNotThrow();
1742 return &CI;
1743 }
1744
1746 if (!II)
1747 return visitCallBase(CI);
1748
1749 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1750 // instead of in visitCallBase.
1751 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1752 if (auto NumBytes = MI->getLengthInBytes()) {
1753 // memmove/cpy/set of zero bytes is a noop.
1754 if (NumBytes->isZero())
1755 return eraseInstFromFunction(CI);
1756
1757 // For atomic unordered mem intrinsics if len is not a positive or
1758 // not a multiple of element size then behavior is undefined.
1759 if (MI->isAtomic() &&
1760 (NumBytes->isNegative() ||
1761 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1763 assert(MI->getType()->isVoidTy() &&
1764 "non void atomic unordered mem intrinsic");
1765 return eraseInstFromFunction(*MI);
1766 }
1767 }
1768
1769 // No other transformations apply to volatile transfers.
1770 if (MI->isVolatile())
1771 return nullptr;
1772
1774 // memmove(x,x,size) -> noop.
1775 if (MTI->getSource() == MTI->getDest())
1776 return eraseInstFromFunction(CI);
1777 }
1778
1779 auto IsPointerUndefined = [MI](Value *Ptr) {
1780 return isa<ConstantPointerNull>(Ptr) &&
1782 MI->getFunction(),
1783 cast<PointerType>(Ptr->getType())->getAddressSpace());
1784 };
1785 bool SrcIsUndefined = false;
1786 // If we can determine a pointer alignment that is bigger than currently
1787 // set, update the alignment.
1788 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1790 return I;
1791 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1792 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1793 if (Instruction *I = SimplifyAnyMemSet(MSI))
1794 return I;
1795 }
1796
1797 // If src/dest is null, this memory intrinsic must be a noop.
1798 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1799 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1800 return eraseInstFromFunction(CI);
1801 }
1802
1803 // If we have a memmove and the source operation is a constant global,
1804 // then the source and dest pointers can't alias, so we can change this
1805 // into a call to memcpy.
1806 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1807 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1808 if (GVSrc->isConstant()) {
1809 Module *M = CI.getModule();
1810 Intrinsic::ID MemCpyID =
1811 MMI->isAtomic()
1812 ? Intrinsic::memcpy_element_unordered_atomic
1813 : Intrinsic::memcpy;
1814 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1815 CI.getArgOperand(1)->getType(),
1816 CI.getArgOperand(2)->getType() };
1818 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
1819 return II;
1820 }
1821 }
1822 }
1823
1824 // For fixed width vector result intrinsics, use the generic demanded vector
1825 // support.
1826 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1827 auto VWidth = IIFVTy->getNumElements();
1828 APInt PoisonElts(VWidth, 0);
1829 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1830 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
1831 if (V != II)
1832 return replaceInstUsesWith(*II, V);
1833 return II;
1834 }
1835 }
1836
1837 if (II->isCommutative()) {
1838 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
1839 replaceOperand(*II, 0, Pair->first);
1840 replaceOperand(*II, 1, Pair->second);
1841 return II;
1842 }
1843
1844 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
1845 return NewCall;
1846 }
1847
1848 // Unused constrained FP intrinsic calls may have declared side effect, which
1849 // prevents it from being removed. In some cases however the side effect is
1850 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1851 // returns a replacement, the call may be removed.
1852 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
1853 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
1854 return eraseInstFromFunction(CI);
1855 }
1856
1857 Intrinsic::ID IID = II->getIntrinsicID();
1858 switch (IID) {
1859 case Intrinsic::objectsize: {
1860 SmallVector<Instruction *> InsertedInstructions;
1861 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
1862 &InsertedInstructions)) {
1863 for (Instruction *Inserted : InsertedInstructions)
1864 Worklist.add(Inserted);
1865 return replaceInstUsesWith(CI, V);
1866 }
1867 return nullptr;
1868 }
1869 case Intrinsic::abs: {
1870 Value *IIOperand = II->getArgOperand(0);
1871 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
1872
1873 // abs(-x) -> abs(x)
1874 Value *X;
1875 if (match(IIOperand, m_Neg(m_Value(X)))) {
1876 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
1877 replaceOperand(*II, 1, Builder.getTrue());
1878 return replaceOperand(*II, 0, X);
1879 }
1880 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
1881 return replaceOperand(*II, 0, X);
1882
1883 Value *Y;
1884 // abs(a * abs(b)) -> abs(a * b)
1885 if (match(IIOperand,
1888 bool NSW =
1889 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
1890 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
1891 return replaceOperand(*II, 0, XY);
1892 }
1893
1894 if (std::optional<bool> Known =
1895 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
1896 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1897 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1898 if (!*Known)
1899 return replaceInstUsesWith(*II, IIOperand);
1900
1901 // abs(x) -> -x if x < 0
1902 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1903 if (IntMinIsPoison)
1904 return BinaryOperator::CreateNSWNeg(IIOperand);
1905 return BinaryOperator::CreateNeg(IIOperand);
1906 }
1907
1908 // abs (sext X) --> zext (abs X*)
1909 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1910 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
1911 Value *NarrowAbs =
1912 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
1913 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
1914 }
1915
1916 // Match a complicated way to check if a number is odd/even:
1917 // abs (srem X, 2) --> and X, 1
1918 const APInt *C;
1919 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
1920 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
1921
1922 break;
1923 }
1924 case Intrinsic::umin: {
1925 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1926 // umin(x, 1) == zext(x != 0)
1927 if (match(I1, m_One())) {
1928 assert(II->getType()->getScalarSizeInBits() != 1 &&
1929 "Expected simplify of umin with max constant");
1930 Value *Zero = Constant::getNullValue(I0->getType());
1931 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
1932 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
1933 }
1934 // umin(cttz(x), const) --> cttz(x | (1 << const))
1935 if (Value *FoldedCttz =
1937 I0, I1, DL, Builder))
1938 return replaceInstUsesWith(*II, FoldedCttz);
1939 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
1940 if (Value *FoldedCtlz =
1942 I0, I1, DL, Builder))
1943 return replaceInstUsesWith(*II, FoldedCtlz);
1944 [[fallthrough]];
1945 }
1946 case Intrinsic::umax: {
1947 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1948 Value *X, *Y;
1949 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
1950 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
1951 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
1952 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1953 }
1954 Constant *C;
1955 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
1956 I0->hasOneUse()) {
1957 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
1958 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
1959 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1960 }
1961 }
1962 // If C is not 0:
1963 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
1964 // If C is not 0 or 1:
1965 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
1966 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
1967 const APInt *C;
1968 Value *X;
1969 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
1970 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
1971 return nullptr;
1972 if (C->isZero())
1973 return nullptr;
1974 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
1975 return nullptr;
1976
1977 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
1978 Value *NewSelect =
1979 Builder.CreateSelect(Cmp, ConstantInt::get(X->getType(), 1), A);
1980 return replaceInstUsesWith(*II, NewSelect);
1981 };
1982
1983 if (IID == Intrinsic::umax) {
1984 if (Instruction *I = foldMaxMulShift(I0, I1))
1985 return I;
1986 if (Instruction *I = foldMaxMulShift(I1, I0))
1987 return I;
1988 }
1989
1990 // If both operands of unsigned min/max are sign-extended, it is still ok
1991 // to narrow the operation.
1992 [[fallthrough]];
1993 }
1994 case Intrinsic::smax:
1995 case Intrinsic::smin: {
1996 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1997 Value *X, *Y;
1998 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
1999 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2000 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2001 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2002 }
2003
2004 Constant *C;
2005 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2006 I0->hasOneUse()) {
2007 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2008 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2009 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2010 }
2011 }
2012
2013 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2014 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2015 const APInt *MinC, *MaxC;
2016 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2017 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2018 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2019 Value *NewMax = Builder.CreateBinaryIntrinsic(
2020 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2021 return replaceInstUsesWith(
2022 *II, Builder.CreateBinaryIntrinsic(
2023 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2024 };
2025 if (IID == Intrinsic::smax &&
2027 m_APInt(MinC)))) &&
2028 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2029 return CreateCanonicalClampForm(true);
2030 if (IID == Intrinsic::umax &&
2032 m_APInt(MinC)))) &&
2033 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2034 return CreateCanonicalClampForm(false);
2035
2036 // umin(i1 X, i1 Y) -> and i1 X, Y
2037 // smax(i1 X, i1 Y) -> and i1 X, Y
2038 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2039 II->getType()->isIntOrIntVectorTy(1)) {
2040 return BinaryOperator::CreateAnd(I0, I1);
2041 }
2042
2043 // umax(i1 X, i1 Y) -> or i1 X, Y
2044 // smin(i1 X, i1 Y) -> or i1 X, Y
2045 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2046 II->getType()->isIntOrIntVectorTy(1)) {
2047 return BinaryOperator::CreateOr(I0, I1);
2048 }
2049
2050 // smin(smax(X, -1), 1) -> scmp(X, 0)
2051 // smax(smin(X, 1), -1) -> scmp(X, 0)
2052 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2053 // And i1's have been changed to and/ors
2054 // So we only need to check for smin
2055 if (IID == Intrinsic::smin) {
2056 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2057 match(I1, m_One())) {
2058 Value *Zero = ConstantInt::get(X->getType(), 0);
2059 return replaceInstUsesWith(
2060 CI,
2061 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2062 }
2063 }
2064
2065 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2066 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2067 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2068 // TODO: Canonicalize neg after min/max if I1 is constant.
2069 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2070 (I0->hasOneUse() || I1->hasOneUse())) {
2072 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2073 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2074 }
2075 }
2076
2077 // (umax X, (xor X, Pow2))
2078 // -> (or X, Pow2)
2079 // (umin X, (xor X, Pow2))
2080 // -> (and X, ~Pow2)
2081 // (smax X, (xor X, Pos_Pow2))
2082 // -> (or X, Pos_Pow2)
2083 // (smin X, (xor X, Pos_Pow2))
2084 // -> (and X, ~Pos_Pow2)
2085 // (smax X, (xor X, Neg_Pow2))
2086 // -> (and X, ~Neg_Pow2)
2087 // (smin X, (xor X, Neg_Pow2))
2088 // -> (or X, Neg_Pow2)
2089 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2090 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2091 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2092 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2093 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2094
2095 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2096 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2097 if (KnownSign == std::nullopt) {
2098 UseOr = false;
2099 UseAndN = false;
2100 } else if (*KnownSign /* true is Signed. */) {
2101 UseOr ^= true;
2102 UseAndN ^= true;
2103 Type *Ty = I0->getType();
2104 // Negative power of 2 must be IntMin. It's possible to be able to
2105 // prove negative / power of 2 without actually having known bits, so
2106 // just get the value by hand.
2108 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2109 }
2110 }
2111 if (UseOr)
2112 return BinaryOperator::CreateOr(I0, X);
2113 else if (UseAndN)
2114 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2115 }
2116
2117 // If we can eliminate ~A and Y is free to invert:
2118 // max ~A, Y --> ~(min A, ~Y)
2119 //
2120 // Examples:
2121 // max ~A, ~Y --> ~(min A, Y)
2122 // max ~A, C --> ~(min A, ~C)
2123 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2124 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2125 Value *A;
2126 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2127 !isFreeToInvert(A, A->hasOneUse())) {
2128 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2130 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2131 return BinaryOperator::CreateNot(InvMaxMin);
2132 }
2133 }
2134 return nullptr;
2135 };
2136
2137 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2138 return I;
2139 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2140 return I;
2141
2143 return I;
2144
2145 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2146 const APInt *RHSC;
2147 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2148 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2149 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2150 ConstantInt::get(II->getType(), *RHSC));
2151
2152 // smax(X, -X) --> abs(X)
2153 // smin(X, -X) --> -abs(X)
2154 // umax(X, -X) --> -abs(X)
2155 // umin(X, -X) --> abs(X)
2156 if (isKnownNegation(I0, I1)) {
2157 // We can choose either operand as the input to abs(), but if we can
2158 // eliminate the only use of a value, that's better for subsequent
2159 // transforms/analysis.
2160 if (I0->hasOneUse() && !I1->hasOneUse())
2161 std::swap(I0, I1);
2162
2163 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2164 // operation and potentially its negation.
2165 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2166 Value *Abs = Builder.CreateBinaryIntrinsic(
2167 Intrinsic::abs, I0,
2168 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2169
2170 // We don't have a "nabs" intrinsic, so negate if needed based on the
2171 // max/min operation.
2172 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2173 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2174 return replaceInstUsesWith(CI, Abs);
2175 }
2176
2178 return Sel;
2179
2180 if (Instruction *SAdd = matchSAddSubSat(*II))
2181 return SAdd;
2182
2183 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2184 return replaceInstUsesWith(*II, NewMinMax);
2185
2187 return R;
2188
2189 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2190 return NewMinMax;
2191
2192 // Try to fold minmax with constant RHS based on range information
2193 if (match(I1, m_APIntAllowPoison(RHSC))) {
2194 ICmpInst::Predicate Pred =
2196 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2198 I0, IsSigned, SQ.getWithInstruction(II));
2199 if (!LHS_CR.isFullSet()) {
2200 if (LHS_CR.icmp(Pred, *RHSC))
2201 return replaceInstUsesWith(*II, I0);
2202 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2203 return replaceInstUsesWith(*II,
2204 ConstantInt::get(II->getType(), *RHSC));
2205 }
2206 }
2207
2209 return replaceInstUsesWith(*II, V);
2210
2211 break;
2212 }
2213 case Intrinsic::scmp: {
2214 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2215 Value *LHS, *RHS;
2216 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2217 return replaceInstUsesWith(
2218 CI,
2219 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2220 break;
2221 }
2222 case Intrinsic::bitreverse: {
2223 Value *IIOperand = II->getArgOperand(0);
2224 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2225 Value *X;
2226 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2227 X->getType()->isIntOrIntVectorTy(1)) {
2228 Type *Ty = II->getType();
2229 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2230 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2232 }
2233
2234 if (Instruction *crossLogicOpFold =
2236 return crossLogicOpFold;
2237
2238 break;
2239 }
2240 case Intrinsic::bswap: {
2241 Value *IIOperand = II->getArgOperand(0);
2242
2243 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2244 // inverse-shift-of-bswap:
2245 // bswap (shl X, Y) --> lshr (bswap X), Y
2246 // bswap (lshr X, Y) --> shl (bswap X), Y
2247 Value *X, *Y;
2248 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2249 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2251 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2252 BinaryOperator::BinaryOps InverseShift =
2253 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2254 ? Instruction::LShr
2255 : Instruction::Shl;
2256 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2257 }
2258 }
2259
2260 KnownBits Known = computeKnownBits(IIOperand, II);
2261 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2262 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2263 unsigned BW = Known.getBitWidth();
2264
2265 // bswap(x) -> shift(x) if x has exactly one "active byte"
2266 if (BW - LZ - TZ == 8) {
2267 assert(LZ != TZ && "active byte cannot be in the middle");
2268 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2269 return BinaryOperator::CreateNUWShl(
2270 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2271 // -> lshr(x) if the "active byte" is in the high part of x
2272 return BinaryOperator::CreateExactLShr(
2273 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2274 }
2275
2276 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2277 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2278 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2279 Value *CV = ConstantInt::get(X->getType(), C);
2280 Value *V = Builder.CreateLShr(X, CV);
2281 return new TruncInst(V, IIOperand->getType());
2282 }
2283
2284 if (Instruction *crossLogicOpFold =
2286 return crossLogicOpFold;
2287 }
2288
2289 // Try to fold into bitreverse if bswap is the root of the expression tree.
2290 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2291 /*MatchBitReversals*/ true))
2292 return BitOp;
2293 break;
2294 }
2295 case Intrinsic::masked_load:
2296 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2297 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2298 break;
2299 case Intrinsic::masked_store:
2300 return simplifyMaskedStore(*II);
2301 case Intrinsic::masked_gather:
2302 return simplifyMaskedGather(*II);
2303 case Intrinsic::masked_scatter:
2304 return simplifyMaskedScatter(*II);
2305 case Intrinsic::launder_invariant_group:
2306 case Intrinsic::strip_invariant_group:
2307 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2308 return replaceInstUsesWith(*II, SkippedBarrier);
2309 break;
2310 case Intrinsic::powi:
2311 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2312 // 0 and 1 are handled in instsimplify
2313 // powi(x, -1) -> 1/x
2314 if (Power->isMinusOne())
2315 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2316 II->getArgOperand(0), II);
2317 // powi(x, 2) -> x*x
2318 if (Power->equalsInt(2))
2319 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2320 II->getArgOperand(0), II);
2321
2322 if (!Power->getValue()[0]) {
2323 Value *X;
2324 // If power is even:
2325 // powi(-x, p) -> powi(x, p)
2326 // powi(fabs(x), p) -> powi(x, p)
2327 // powi(copysign(x, y), p) -> powi(x, p)
2328 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2329 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2330 match(II->getArgOperand(0),
2332 return replaceOperand(*II, 0, X);
2333 }
2334 }
2335 break;
2336
2337 case Intrinsic::cttz:
2338 case Intrinsic::ctlz:
2339 if (auto *I = foldCttzCtlz(*II, *this))
2340 return I;
2341 break;
2342
2343 case Intrinsic::ctpop:
2344 if (auto *I = foldCtpop(*II, *this))
2345 return I;
2346 break;
2347
2348 case Intrinsic::fshl:
2349 case Intrinsic::fshr: {
2350 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2351 Type *Ty = II->getType();
2352 unsigned BitWidth = Ty->getScalarSizeInBits();
2353 Constant *ShAmtC;
2354 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2355 // Canonicalize a shift amount constant operand to modulo the bit-width.
2356 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2357 Constant *ModuloC =
2358 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2359 if (!ModuloC)
2360 return nullptr;
2361 if (ModuloC != ShAmtC)
2362 return replaceOperand(*II, 2, ModuloC);
2363
2365 ShAmtC, DL),
2366 m_One()) &&
2367 "Shift amount expected to be modulo bitwidth");
2368
2369 // Canonicalize funnel shift right by constant to funnel shift left. This
2370 // is not entirely arbitrary. For historical reasons, the backend may
2371 // recognize rotate left patterns but miss rotate right patterns.
2372 if (IID == Intrinsic::fshr) {
2373 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2374 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2375 return nullptr;
2376
2377 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2378 Module *Mod = II->getModule();
2379 Function *Fshl =
2380 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2381 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2382 }
2383 assert(IID == Intrinsic::fshl &&
2384 "All funnel shifts by simple constants should go left");
2385
2386 // fshl(X, 0, C) --> shl X, C
2387 // fshl(X, undef, C) --> shl X, C
2388 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2389 return BinaryOperator::CreateShl(Op0, ShAmtC);
2390
2391 // fshl(0, X, C) --> lshr X, (BW-C)
2392 // fshl(undef, X, C) --> lshr X, (BW-C)
2393 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2394 return BinaryOperator::CreateLShr(Op1,
2395 ConstantExpr::getSub(WidthC, ShAmtC));
2396
2397 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2398 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2399 Module *Mod = II->getModule();
2400 Function *Bswap =
2401 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2402 return CallInst::Create(Bswap, { Op0 });
2403 }
2404 if (Instruction *BitOp =
2405 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2406 /*MatchBitReversals*/ true))
2407 return BitOp;
2408 }
2409
2410 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2411 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2412 // if BitWidth is a power-of-2
2413 Value *Y;
2414 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2415 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2416 Module *Mod = II->getModule();
2418 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2419 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2420 }
2421
2422 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2423 // power-of-2
2424 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2425 match(Op1, m_ZeroInt())) {
2426 Value *Op2 = II->getArgOperand(2);
2427 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2428 return BinaryOperator::CreateShl(Op0, And);
2429 }
2430
2431 // Left or right might be masked.
2433 return &CI;
2434
2435 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2436 // so only the low bits of the shift amount are demanded if the bitwidth is
2437 // a power-of-2.
2438 if (!isPowerOf2_32(BitWidth))
2439 break;
2441 KnownBits Op2Known(BitWidth);
2442 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2443 return &CI;
2444 break;
2445 }
2446 case Intrinsic::ptrmask: {
2447 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2448 KnownBits Known(BitWidth);
2450 return II;
2451
2452 Value *InnerPtr, *InnerMask;
2453 bool Changed = false;
2454 // Combine:
2455 // (ptrmask (ptrmask p, A), B)
2456 // -> (ptrmask p, (and A, B))
2457 if (match(II->getArgOperand(0),
2459 m_Value(InnerMask))))) {
2460 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2461 "Mask types must match");
2462 // TODO: If InnerMask == Op1, we could copy attributes from inner
2463 // callsite -> outer callsite.
2464 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2465 replaceOperand(CI, 0, InnerPtr);
2466 replaceOperand(CI, 1, NewMask);
2467 Changed = true;
2468 }
2469
2470 // See if we can deduce non-null.
2471 if (!CI.hasRetAttr(Attribute::NonNull) &&
2472 (Known.isNonZero() ||
2473 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2474 CI.addRetAttr(Attribute::NonNull);
2475 Changed = true;
2476 }
2477
2478 unsigned NewAlignmentLog =
2480 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2481 // Known bits will capture if we had alignment information associated with
2482 // the pointer argument.
2483 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2485 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2486 Changed = true;
2487 }
2488 if (Changed)
2489 return &CI;
2490 break;
2491 }
2492 case Intrinsic::uadd_with_overflow:
2493 case Intrinsic::sadd_with_overflow: {
2494 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2495 return I;
2496
2497 // Given 2 constant operands whose sum does not overflow:
2498 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2499 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2500 Value *X;
2501 const APInt *C0, *C1;
2502 Value *Arg0 = II->getArgOperand(0);
2503 Value *Arg1 = II->getArgOperand(1);
2504 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2505 bool HasNWAdd = IsSigned
2506 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2507 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2508 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2509 bool Overflow;
2510 APInt NewC =
2511 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2512 if (!Overflow)
2513 return replaceInstUsesWith(
2514 *II, Builder.CreateBinaryIntrinsic(
2515 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2516 }
2517 break;
2518 }
2519
2520 case Intrinsic::umul_with_overflow:
2521 case Intrinsic::smul_with_overflow:
2522 case Intrinsic::usub_with_overflow:
2523 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2524 return I;
2525 break;
2526
2527 case Intrinsic::ssub_with_overflow: {
2528 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2529 return I;
2530
2531 Constant *C;
2532 Value *Arg0 = II->getArgOperand(0);
2533 Value *Arg1 = II->getArgOperand(1);
2534 // Given a constant C that is not the minimum signed value
2535 // for an integer of a given bit width:
2536 //
2537 // ssubo X, C -> saddo X, -C
2538 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2539 Value *NegVal = ConstantExpr::getNeg(C);
2540 // Build a saddo call that is equivalent to the discovered
2541 // ssubo call.
2542 return replaceInstUsesWith(
2543 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2544 Arg0, NegVal));
2545 }
2546
2547 break;
2548 }
2549
2550 case Intrinsic::uadd_sat:
2551 case Intrinsic::sadd_sat:
2552 case Intrinsic::usub_sat:
2553 case Intrinsic::ssub_sat: {
2555 Type *Ty = SI->getType();
2556 Value *Arg0 = SI->getLHS();
2557 Value *Arg1 = SI->getRHS();
2558
2559 // Make use of known overflow information.
2560 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2561 Arg0, Arg1, SI);
2562 switch (OR) {
2564 break;
2566 if (SI->isSigned())
2567 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2568 else
2569 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2571 unsigned BitWidth = Ty->getScalarSizeInBits();
2572 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2573 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2574 }
2576 unsigned BitWidth = Ty->getScalarSizeInBits();
2577 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2578 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2579 }
2580 }
2581
2582 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2583 // which after that:
2584 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2585 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2586 Constant *C, *C1;
2587 Value *A;
2588 if (IID == Intrinsic::usub_sat &&
2589 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2590 match(Arg1, m_ImmConstant(C1))) {
2591 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2592 auto *NewSub =
2593 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2594 return replaceInstUsesWith(*SI, NewSub);
2595 }
2596
2597 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2598 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2599 C->isNotMinSignedValue()) {
2600 Value *NegVal = ConstantExpr::getNeg(C);
2601 return replaceInstUsesWith(
2602 *II, Builder.CreateBinaryIntrinsic(
2603 Intrinsic::sadd_sat, Arg0, NegVal));
2604 }
2605
2606 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2607 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2608 // if Val and Val2 have the same sign
2609 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2610 Value *X;
2611 const APInt *Val, *Val2;
2612 APInt NewVal;
2613 bool IsUnsigned =
2614 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2615 if (Other->getIntrinsicID() == IID &&
2616 match(Arg1, m_APInt(Val)) &&
2617 match(Other->getArgOperand(0), m_Value(X)) &&
2618 match(Other->getArgOperand(1), m_APInt(Val2))) {
2619 if (IsUnsigned)
2620 NewVal = Val->uadd_sat(*Val2);
2621 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2622 bool Overflow;
2623 NewVal = Val->sadd_ov(*Val2, Overflow);
2624 if (Overflow) {
2625 // Both adds together may add more than SignedMaxValue
2626 // without saturating the final result.
2627 break;
2628 }
2629 } else {
2630 // Cannot fold saturated addition with different signs.
2631 break;
2632 }
2633
2634 return replaceInstUsesWith(
2635 *II, Builder.CreateBinaryIntrinsic(
2636 IID, X, ConstantInt::get(II->getType(), NewVal)));
2637 }
2638 }
2639 break;
2640 }
2641
2642 case Intrinsic::minnum:
2643 case Intrinsic::maxnum:
2644 case Intrinsic::minimum:
2645 case Intrinsic::maximum: {
2646 Value *Arg0 = II->getArgOperand(0);
2647 Value *Arg1 = II->getArgOperand(1);
2648 Value *X, *Y;
2649 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2650 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2651 // If both operands are negated, invert the call and negate the result:
2652 // min(-X, -Y) --> -(max(X, Y))
2653 // max(-X, -Y) --> -(min(X, Y))
2654 Intrinsic::ID NewIID;
2655 switch (IID) {
2656 case Intrinsic::maxnum:
2657 NewIID = Intrinsic::minnum;
2658 break;
2659 case Intrinsic::minnum:
2660 NewIID = Intrinsic::maxnum;
2661 break;
2662 case Intrinsic::maximum:
2663 NewIID = Intrinsic::minimum;
2664 break;
2665 case Intrinsic::minimum:
2666 NewIID = Intrinsic::maximum;
2667 break;
2668 default:
2669 llvm_unreachable("unexpected intrinsic ID");
2670 }
2671 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2672 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2673 FNeg->copyIRFlags(II);
2674 return FNeg;
2675 }
2676
2677 // m(m(X, C2), C1) -> m(X, C)
2678 const APFloat *C1, *C2;
2679 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2680 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2681 ((match(M->getArgOperand(0), m_Value(X)) &&
2682 match(M->getArgOperand(1), m_APFloat(C2))) ||
2683 (match(M->getArgOperand(1), m_Value(X)) &&
2684 match(M->getArgOperand(0), m_APFloat(C2))))) {
2685 APFloat Res(0.0);
2686 switch (IID) {
2687 case Intrinsic::maxnum:
2688 Res = maxnum(*C1, *C2);
2689 break;
2690 case Intrinsic::minnum:
2691 Res = minnum(*C1, *C2);
2692 break;
2693 case Intrinsic::maximum:
2694 Res = maximum(*C1, *C2);
2695 break;
2696 case Intrinsic::minimum:
2697 Res = minimum(*C1, *C2);
2698 break;
2699 default:
2700 llvm_unreachable("unexpected intrinsic ID");
2701 }
2702 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2703 // was a simplification (so Arg0 and its original flags could
2704 // propagate?)
2705 Value *V = Builder.CreateBinaryIntrinsic(
2706 IID, X, ConstantFP::get(Arg0->getType(), Res),
2708 return replaceInstUsesWith(*II, V);
2709 }
2710 }
2711
2712 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2713 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2714 match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
2715 X->getType() == Y->getType()) {
2716 Value *NewCall =
2717 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2718 return new FPExtInst(NewCall, II->getType());
2719 }
2720
2721 // max X, -X --> fabs X
2722 // min X, -X --> -(fabs X)
2723 // TODO: Remove one-use limitation? That is obviously better for max,
2724 // hence why we don't check for one-use for that. However,
2725 // it would be an extra instruction for min (fnabs), but
2726 // that is still likely better for analysis and codegen.
2727 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2728 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2729 return Op0->hasOneUse() ||
2730 (IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2731 return false;
2732 };
2733
2734 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2735 Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2736 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
2737 R = Builder.CreateFNegFMF(R, II);
2738 return replaceInstUsesWith(*II, R);
2739 }
2740
2741 break;
2742 }
2743 case Intrinsic::matrix_multiply: {
2744 // Optimize negation in matrix multiplication.
2745
2746 // -A * -B -> A * B
2747 Value *A, *B;
2748 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2749 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2750 replaceOperand(*II, 0, A);
2751 replaceOperand(*II, 1, B);
2752 return II;
2753 }
2754
2755 Value *Op0 = II->getOperand(0);
2756 Value *Op1 = II->getOperand(1);
2757 Value *OpNotNeg, *NegatedOp;
2758 unsigned NegatedOpArg, OtherOpArg;
2759 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2760 NegatedOp = Op0;
2761 NegatedOpArg = 0;
2762 OtherOpArg = 1;
2763 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2764 NegatedOp = Op1;
2765 NegatedOpArg = 1;
2766 OtherOpArg = 0;
2767 } else
2768 // Multiplication doesn't have a negated operand.
2769 break;
2770
2771 // Only optimize if the negated operand has only one use.
2772 if (!NegatedOp->hasOneUse())
2773 break;
2774
2775 Value *OtherOp = II->getOperand(OtherOpArg);
2776 VectorType *RetTy = cast<VectorType>(II->getType());
2777 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
2778 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
2779 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2780 ElementCount OtherCount = OtherOpTy->getElementCount();
2781 ElementCount RetCount = RetTy->getElementCount();
2782 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2783 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
2784 ElementCount::isKnownLT(OtherCount, RetCount)) {
2785 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
2786 replaceOperand(*II, NegatedOpArg, OpNotNeg);
2787 replaceOperand(*II, OtherOpArg, InverseOtherOp);
2788 return II;
2789 }
2790 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2791 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
2792 SmallVector<Value *, 5> NewArgs(II->args());
2793 NewArgs[NegatedOpArg] = OpNotNeg;
2794 Instruction *NewMul =
2795 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
2796 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
2797 }
2798 break;
2799 }
2800 case Intrinsic::fmuladd: {
2801 // Try to simplify the underlying FMul.
2802 if (Value *V =
2803 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
2804 II->getFastMathFlags(), SQ.getWithInstruction(II)))
2805 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
2806 II->getFastMathFlags());
2807
2808 [[fallthrough]];
2809 }
2810 case Intrinsic::fma: {
2811 // fma fneg(x), fneg(y), z -> fma x, y, z
2812 Value *Src0 = II->getArgOperand(0);
2813 Value *Src1 = II->getArgOperand(1);
2814 Value *Src2 = II->getArgOperand(2);
2815 Value *X, *Y;
2816 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2817 replaceOperand(*II, 0, X);
2818 replaceOperand(*II, 1, Y);
2819 return II;
2820 }
2821
2822 // fma fabs(x), fabs(x), z -> fma x, x, z
2823 if (match(Src0, m_FAbs(m_Value(X))) &&
2824 match(Src1, m_FAbs(m_Specific(X)))) {
2825 replaceOperand(*II, 0, X);
2826 replaceOperand(*II, 1, X);
2827 return II;
2828 }
2829
2830 // Try to simplify the underlying FMul. We can only apply simplifications
2831 // that do not require rounding.
2832 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
2833 SQ.getWithInstruction(II)))
2834 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
2835
2836 // fma x, y, 0 -> fmul x, y
2837 // This is always valid for -0.0, but requires nsz for +0.0 as
2838 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2839 if (match(Src2, m_NegZeroFP()) ||
2840 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
2841 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
2842
2843 // fma x, -1.0, y -> fsub y, x
2844 if (match(Src1, m_SpecificFP(-1.0)))
2845 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
2846
2847 break;
2848 }
2849 case Intrinsic::copysign: {
2850 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
2851 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
2852 Sign, getSimplifyQuery().getWithInstruction(II))) {
2853 if (*KnownSignBit) {
2854 // If we know that the sign argument is negative, reduce to FNABS:
2855 // copysign Mag, -Sign --> fneg (fabs Mag)
2856 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2857 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
2858 }
2859
2860 // If we know that the sign argument is positive, reduce to FABS:
2861 // copysign Mag, +Sign --> fabs Mag
2862 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2863 return replaceInstUsesWith(*II, Fabs);
2864 }
2865
2866 // Propagate sign argument through nested calls:
2867 // copysign Mag, (copysign ?, X) --> copysign Mag, X
2868 Value *X;
2870 Value *CopySign =
2871 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
2872 return replaceInstUsesWith(*II, CopySign);
2873 }
2874
2875 // Clear sign-bit of constant magnitude:
2876 // copysign -MagC, X --> copysign MagC, X
2877 // TODO: Support constant folding for fabs
2878 const APFloat *MagC;
2879 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
2880 APFloat PosMagC = *MagC;
2881 PosMagC.clearSign();
2882 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
2883 }
2884
2885 // Peek through changes of magnitude's sign-bit. This call rewrites those:
2886 // copysign (fabs X), Sign --> copysign X, Sign
2887 // copysign (fneg X), Sign --> copysign X, Sign
2888 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
2889 return replaceOperand(*II, 0, X);
2890
2891 break;
2892 }
2893 case Intrinsic::fabs: {
2894 Value *Cond, *TVal, *FVal;
2895 Value *Arg = II->getArgOperand(0);
2896 Value *X;
2897 // fabs (-X) --> fabs (X)
2898 if (match(Arg, m_FNeg(m_Value(X)))) {
2899 CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2900 return replaceInstUsesWith(CI, Fabs);
2901 }
2902
2903 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
2904 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2905 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
2906 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
2907 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
2908 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
2909 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
2910 FastMathFlags FMF1 = II->getFastMathFlags();
2911 FastMathFlags FMF2 = cast<SelectInst>(Arg)->getFastMathFlags();
2912 FMF2.setNoSignedZeros(false);
2913 SI->setFastMathFlags(FMF1 | FMF2);
2914 return SI;
2915 }
2916 // fabs (select Cond, -FVal, FVal) --> fabs FVal
2917 if (match(TVal, m_FNeg(m_Specific(FVal))))
2918 return replaceOperand(*II, 0, FVal);
2919 // fabs (select Cond, TVal, -TVal) --> fabs TVal
2920 if (match(FVal, m_FNeg(m_Specific(TVal))))
2921 return replaceOperand(*II, 0, TVal);
2922 }
2923
2924 Value *Magnitude, *Sign;
2925 if (match(II->getArgOperand(0),
2926 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
2927 // fabs (copysign x, y) -> (fabs x)
2928 CallInst *AbsSign =
2929 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II);
2930 return replaceInstUsesWith(*II, AbsSign);
2931 }
2932
2933 [[fallthrough]];
2934 }
2935 case Intrinsic::ceil:
2936 case Intrinsic::floor:
2937 case Intrinsic::round:
2938 case Intrinsic::roundeven:
2939 case Intrinsic::nearbyint:
2940 case Intrinsic::rint:
2941 case Intrinsic::trunc: {
2942 Value *ExtSrc;
2943 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
2944 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2945 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
2946 return new FPExtInst(NarrowII, II->getType());
2947 }
2948 break;
2949 }
2950 case Intrinsic::cos:
2951 case Intrinsic::amdgcn_cos: {
2952 Value *X, *Sign;
2953 Value *Src = II->getArgOperand(0);
2954 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
2955 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
2956 // cos(-x) --> cos(x)
2957 // cos(fabs(x)) --> cos(x)
2958 // cos(copysign(x, y)) --> cos(x)
2959 return replaceOperand(*II, 0, X);
2960 }
2961 break;
2962 }
2963 case Intrinsic::sin:
2964 case Intrinsic::amdgcn_sin: {
2965 Value *X;
2966 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
2967 // sin(-x) --> -sin(x)
2968 Value *NewSin = Builder.CreateUnaryIntrinsic(IID, X, II);
2969 return UnaryOperator::CreateFNegFMF(NewSin, II);
2970 }
2971 break;
2972 }
2973 case Intrinsic::ldexp: {
2974 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
2975 //
2976 // The danger is if the first ldexp would overflow to infinity or underflow
2977 // to zero, but the combined exponent avoids it. We ignore this with
2978 // reassoc.
2979 //
2980 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
2981 // it would just double down on the overflow/underflow which would occur
2982 // anyway.
2983 //
2984 // TODO: Could do better if we had range tracking for the input value
2985 // exponent. Also could broaden sign check to cover == 0 case.
2986 Value *Src = II->getArgOperand(0);
2987 Value *Exp = II->getArgOperand(1);
2988 Value *InnerSrc;
2989 Value *InnerExp;
2991 m_Value(InnerSrc), m_Value(InnerExp)))) &&
2992 Exp->getType() == InnerExp->getType()) {
2993 FastMathFlags FMF = II->getFastMathFlags();
2994 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
2995
2996 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
2997 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II))) {
2998 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
2999 // width.
3000 Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
3001 II->setArgOperand(1, NewExp);
3002 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3003 return replaceOperand(*II, 0, InnerSrc);
3004 }
3005 }
3006
3007 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3008 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3009 Value *ExtSrc;
3010 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3011 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3012 Value *Select =
3013 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3014 ConstantFP::get(II->getType(), 1.0));
3016 }
3017 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3018 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3019 Value *Select =
3020 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3021 ConstantFP::get(II->getType(), 1.0));
3023 }
3024
3025 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3026 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3027 ///
3028 // TODO: If we cared, should insert a canonicalize for x
3029 Value *SelectCond, *SelectLHS, *SelectRHS;
3030 if (match(II->getArgOperand(1),
3031 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3032 m_Value(SelectRHS))))) {
3033 Value *NewLdexp = nullptr;
3034 Value *Select = nullptr;
3035 if (match(SelectRHS, m_ZeroInt())) {
3036 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3037 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3038 } else if (match(SelectLHS, m_ZeroInt())) {
3039 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3040 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3041 }
3042
3043 if (NewLdexp) {
3044 Select->takeName(II);
3045 return replaceInstUsesWith(*II, Select);
3046 }
3047 }
3048
3049 break;
3050 }
3051 case Intrinsic::ptrauth_auth:
3052 case Intrinsic::ptrauth_resign: {
3053 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3054 // sign+auth component if the key and discriminator match.
3055 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3056 Value *Ptr = II->getArgOperand(0);
3057 Value *Key = II->getArgOperand(1);
3058 Value *Disc = II->getArgOperand(2);
3059
3060 // AuthKey will be the key we need to end up authenticating against in
3061 // whatever we replace this sequence with.
3062 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3063 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3064 BasePtr = CI->getArgOperand(0);
3065 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3066 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3067 break;
3068 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3069 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3070 break;
3071 AuthKey = CI->getArgOperand(1);
3072 AuthDisc = CI->getArgOperand(2);
3073 } else
3074 break;
3075 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3076 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3077 // our purposes, so check for that too.
3078 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3079 if (!CPA || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3080 break;
3081
3082 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3083 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3084 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3085 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3086 auto *SignAddrDisc = ConstantPointerNull::get(Builder.getPtrTy());
3087 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3088 SignDisc, SignAddrDisc);
3090 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3091 return eraseInstFromFunction(*II);
3092 }
3093
3094 // auth(ptrauth(p,k,d),k,d) -> p
3095 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3096 } else
3097 break;
3098
3099 unsigned NewIntrin;
3100 if (AuthKey && NeedSign) {
3101 // resign(0,1) + resign(1,2) = resign(0, 2)
3102 NewIntrin = Intrinsic::ptrauth_resign;
3103 } else if (AuthKey) {
3104 // resign(0,1) + auth(1) = auth(0)
3105 NewIntrin = Intrinsic::ptrauth_auth;
3106 } else if (NeedSign) {
3107 // sign(0) + resign(0, 1) = sign(1)
3108 NewIntrin = Intrinsic::ptrauth_sign;
3109 } else {
3110 // sign(0) + auth(0) = nop
3111 replaceInstUsesWith(*II, BasePtr);
3112 return eraseInstFromFunction(*II);
3113 }
3114
3115 SmallVector<Value *, 4> CallArgs;
3116 CallArgs.push_back(BasePtr);
3117 if (AuthKey) {
3118 CallArgs.push_back(AuthKey);
3119 CallArgs.push_back(AuthDisc);
3120 }
3121
3122 if (NeedSign) {
3123 CallArgs.push_back(II->getArgOperand(3));
3124 CallArgs.push_back(II->getArgOperand(4));
3125 }
3126
3127 Function *NewFn =
3128 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3129 return CallInst::Create(NewFn, CallArgs);
3130 }
3131 case Intrinsic::arm_neon_vtbl1:
3132 case Intrinsic::aarch64_neon_tbl1:
3133 if (Value *V = simplifyNeonTbl1(*II, Builder))
3134 return replaceInstUsesWith(*II, V);
3135 break;
3136
3137 case Intrinsic::arm_neon_vmulls:
3138 case Intrinsic::arm_neon_vmullu:
3139 case Intrinsic::aarch64_neon_smull:
3140 case Intrinsic::aarch64_neon_umull: {
3141 Value *Arg0 = II->getArgOperand(0);
3142 Value *Arg1 = II->getArgOperand(1);
3143
3144 // Handle mul by zero first:
3146 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3147 }
3148
3149 // Check for constant LHS & RHS - in this case we just simplify.
3150 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3151 IID == Intrinsic::aarch64_neon_umull);
3152 VectorType *NewVT = cast<VectorType>(II->getType());
3153 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3154 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3155 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3156 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3157 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3158 }
3159
3160 // Couldn't simplify - canonicalize constant to the RHS.
3161 std::swap(Arg0, Arg1);
3162 }
3163
3164 // Handle mul by one:
3165 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3166 if (ConstantInt *Splat =
3167 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3168 if (Splat->isOne())
3169 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3170 /*isSigned=*/!Zext);
3171
3172 break;
3173 }
3174 case Intrinsic::arm_neon_aesd:
3175 case Intrinsic::arm_neon_aese:
3176 case Intrinsic::aarch64_crypto_aesd:
3177 case Intrinsic::aarch64_crypto_aese:
3178 case Intrinsic::aarch64_sve_aesd:
3179 case Intrinsic::aarch64_sve_aese: {
3180 Value *DataArg = II->getArgOperand(0);
3181 Value *KeyArg = II->getArgOperand(1);
3182
3183 // Accept zero on either operand.
3184 if (!match(KeyArg, m_ZeroInt()))
3185 std::swap(KeyArg, DataArg);
3186
3187 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3188 Value *Data, *Key;
3189 if (match(KeyArg, m_ZeroInt()) &&
3190 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3191 replaceOperand(*II, 0, Data);
3192 replaceOperand(*II, 1, Key);
3193 return II;
3194 }
3195 break;
3196 }
3197 case Intrinsic::hexagon_V6_vandvrt:
3198 case Intrinsic::hexagon_V6_vandvrt_128B: {
3199 // Simplify Q -> V -> Q conversion.
3200 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3201 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3202 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3203 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3204 break;
3205 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3206 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3207 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3208 // Check if every byte has common bits in Bytes and Mask.
3209 uint64_t C = Bytes1 & Mask1;
3210 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3211 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3212 }
3213 break;
3214 }
3215 case Intrinsic::stackrestore: {
3216 enum class ClassifyResult {
3217 None,
3218 Alloca,
3219 StackRestore,
3220 CallWithSideEffects,
3221 };
3222 auto Classify = [](const Instruction *I) {
3223 if (isa<AllocaInst>(I))
3224 return ClassifyResult::Alloca;
3225
3226 if (auto *CI = dyn_cast<CallInst>(I)) {
3227 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3228 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3229 return ClassifyResult::StackRestore;
3230
3231 if (II->mayHaveSideEffects())
3232 return ClassifyResult::CallWithSideEffects;
3233 } else {
3234 // Consider all non-intrinsic calls to be side effects
3235 return ClassifyResult::CallWithSideEffects;
3236 }
3237 }
3238
3239 return ClassifyResult::None;
3240 };
3241
3242 // If the stacksave and the stackrestore are in the same BB, and there is
3243 // no intervening call, alloca, or stackrestore of a different stacksave,
3244 // remove the restore. This can happen when variable allocas are DCE'd.
3245 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3246 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3247 SS->getParent() == II->getParent()) {
3248 BasicBlock::iterator BI(SS);
3249 bool CannotRemove = false;
3250 for (++BI; &*BI != II; ++BI) {
3251 switch (Classify(&*BI)) {
3252 case ClassifyResult::None:
3253 // So far so good, look at next instructions.
3254 break;
3255
3256 case ClassifyResult::StackRestore:
3257 // If we found an intervening stackrestore for a different
3258 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3259 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3260 CannotRemove = true;
3261 break;
3262
3263 case ClassifyResult::Alloca:
3264 case ClassifyResult::CallWithSideEffects:
3265 // If we found an alloca, a non-intrinsic call, or an intrinsic
3266 // call with side effects, we can't remove the stackrestore.
3267 CannotRemove = true;
3268 break;
3269 }
3270 if (CannotRemove)
3271 break;
3272 }
3273
3274 if (!CannotRemove)
3275 return eraseInstFromFunction(CI);
3276 }
3277 }
3278
3279 // Scan down this block to see if there is another stack restore in the
3280 // same block without an intervening call/alloca.
3282 Instruction *TI = II->getParent()->getTerminator();
3283 bool CannotRemove = false;
3284 for (++BI; &*BI != TI; ++BI) {
3285 switch (Classify(&*BI)) {
3286 case ClassifyResult::None:
3287 // So far so good, look at next instructions.
3288 break;
3289
3290 case ClassifyResult::StackRestore:
3291 // If there is a stackrestore below this one, remove this one.
3292 return eraseInstFromFunction(CI);
3293
3294 case ClassifyResult::Alloca:
3295 case ClassifyResult::CallWithSideEffects:
3296 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3297 // with side effects (such as llvm.stacksave and llvm.read_register),
3298 // we can't remove the stack restore.
3299 CannotRemove = true;
3300 break;
3301 }
3302 if (CannotRemove)
3303 break;
3304 }
3305
3306 // If the stack restore is in a return, resume, or unwind block and if there
3307 // are no allocas or calls between the restore and the return, nuke the
3308 // restore.
3309 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3310 return eraseInstFromFunction(CI);
3311 break;
3312 }
3313 case Intrinsic::lifetime_end:
3314 // Asan needs to poison memory to detect invalid access which is possible
3315 // even for empty lifetime range.
3316 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3317 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3318 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3319 break;
3320
3321 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3322 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3323 }))
3324 return nullptr;
3325 break;
3326 case Intrinsic::assume: {
3327 Value *IIOperand = II->getArgOperand(0);
3329 II->getOperandBundlesAsDefs(OpBundles);
3330
3331 /// This will remove the boolean Condition from the assume given as
3332 /// argument and remove the assume if it becomes useless.
3333 /// always returns nullptr for use as a return values.
3334 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3335 assert(isa<AssumeInst>(Assume));
3337 return eraseInstFromFunction(CI);
3338 replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
3339 return nullptr;
3340 };
3341 // Remove an assume if it is followed by an identical assume.
3342 // TODO: Do we need this? Unless there are conflicting assumptions, the
3343 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3344 Instruction *Next = II->getNextNode();
3346 return RemoveConditionFromAssume(Next);
3347
3348 // Canonicalize assume(a && b) -> assume(a); assume(b);
3349 // Note: New assumption intrinsics created here are registered by
3350 // the InstCombineIRInserter object.
3351 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3352 Value *AssumeIntrinsic = II->getCalledOperand();
3353 Value *A, *B;
3354 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3355 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
3356 II->getName());
3357 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
3358 return eraseInstFromFunction(*II);
3359 }
3360 // assume(!(a || b)) -> assume(!a); assume(!b);
3361 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3362 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3363 Builder.CreateNot(A), OpBundles, II->getName());
3364 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3365 Builder.CreateNot(B), II->getName());
3366 return eraseInstFromFunction(*II);
3367 }
3368
3369 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3370 // (if assume is valid at the load)
3371 Instruction *LHS;
3373 m_Zero())) &&
3374 LHS->getOpcode() == Instruction::Load &&
3375 LHS->getType()->isPointerTy() &&
3376 isValidAssumeForContext(II, LHS, &DT)) {
3377 MDNode *MD = MDNode::get(II->getContext(), {});
3378 LHS->setMetadata(LLVMContext::MD_nonnull, MD);
3379 LHS->setMetadata(LLVMContext::MD_noundef, MD);
3380 return RemoveConditionFromAssume(II);
3381
3382 // TODO: apply nonnull return attributes to calls and invokes
3383 // TODO: apply range metadata for range check patterns?
3384 }
3385
3386 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3387 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3388
3389 // Separate storage assumptions apply to the underlying allocations, not
3390 // any particular pointer within them. When evaluating the hints for AA
3391 // purposes we getUnderlyingObject them; by precomputing the answers here
3392 // we can avoid having to do so repeatedly there.
3393 if (OBU.getTagName() == "separate_storage") {
3394 assert(OBU.Inputs.size() == 2);
3395 auto MaybeSimplifyHint = [&](const Use &U) {
3396 Value *Hint = U.get();
3397 // Not having a limit is safe because InstCombine removes unreachable
3398 // code.
3399 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3400 if (Hint != UnderlyingObject)
3401 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3402 };
3403 MaybeSimplifyHint(OBU.Inputs[0]);
3404 MaybeSimplifyHint(OBU.Inputs[1]);
3405 }
3406
3407 // Try to remove redundant alignment assumptions.
3408 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3410 *cast<AssumeInst>(II), II->arg_size() + Idx);
3411 if (!RK || RK.AttrKind != Attribute::Alignment ||
3413 continue;
3414
3415 // Don't try to remove align assumptions for pointers derived from
3416 // arguments. We might lose information if the function gets inline and
3417 // the align argument attribute disappears.
3419 if (!UO || isa<Argument>(UO))
3420 continue;
3421
3422 // Compute known bits for the pointer, passing nullptr as context to
3423 // avoid computeKnownBits using the assumption we are about to remove
3424 // for reasoning.
3425 KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr);
3426 unsigned TZ = std::min(Known.countMinTrailingZeros(),
3428 if ((1ULL << TZ) < RK.ArgValue)
3429 continue;
3431 }
3432 }
3433
3434 // Convert nonnull assume like:
3435 // %A = icmp ne i32* %PTR, null
3436 // call void @llvm.assume(i1 %A)
3437 // into
3438 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3440 match(IIOperand,
3442 A->getType()->isPointerTy()) {
3443 if (auto *Replacement = buildAssumeFromKnowledge(
3444 {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
3445
3446 Replacement->insertBefore(Next->getIterator());
3447 AC.registerAssumption(Replacement);
3448 return RemoveConditionFromAssume(II);
3449 }
3450 }
3451
3452 // Convert alignment assume like:
3453 // %B = ptrtoint i32* %A to i64
3454 // %C = and i64 %B, Constant
3455 // %D = icmp eq i64 %C, 0
3456 // call void @llvm.assume(i1 %D)
3457 // into
3458 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3459 uint64_t AlignMask = 1;
3461 (match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3462 match(IIOperand,
3464 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3465 m_Zero())))) {
3466 if (isPowerOf2_64(AlignMask + 1)) {
3467 uint64_t Offset = 0;
3469 if (match(A, m_PtrToInt(m_Value(A)))) {
3470 /// Note: this doesn't preserve the offset information but merges
3471 /// offset and alignment.
3472 /// TODO: we can generate a GEP instead of merging the alignment with
3473 /// the offset.
3474 RetainedKnowledge RK{Attribute::Alignment,
3475 (unsigned)MinAlign(Offset, AlignMask + 1), A};
3476 if (auto *Replacement =
3478
3479 Replacement->insertAfter(II->getIterator());
3480 AC.registerAssumption(Replacement);
3481 }
3482 return RemoveConditionFromAssume(II);
3483 }
3484 }
3485 }
3486
3487 /// Canonicalize Knowledge in operand bundles.
3488 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3489 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3490 auto &BOI = II->bundle_op_info_begin()[Idx];
3493 if (BOI.End - BOI.Begin > 2)
3494 continue; // Prevent reducing knowledge in an align with offset since
3495 // extracting a RetainedKnowledge from them looses offset
3496 // information
3497 RetainedKnowledge CanonRK =
3500 &getDominatorTree());
3501 if (CanonRK == RK)
3502 continue;
3503 if (!CanonRK) {
3504 if (BOI.End - BOI.Begin > 0) {
3505 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3506 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3507 }
3508 continue;
3509 }
3510 assert(RK.AttrKind == CanonRK.AttrKind);
3511 if (BOI.End - BOI.Begin > 0)
3512 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3513 if (BOI.End - BOI.Begin > 1)
3514 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3515 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3516 if (RK.WasOn)
3517 Worklist.pushValue(RK.WasOn);
3518 return II;
3519 }
3520 }
3521
3522 // If there is a dominating assume with the same condition as this one,
3523 // then this one is redundant, and should be removed.
3524 KnownBits Known(1);
3525 computeKnownBits(IIOperand, Known, II);
3527 return eraseInstFromFunction(*II);
3528
3529 // assume(false) is unreachable.
3530 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3532 return eraseInstFromFunction(*II);
3533 }
3534
3535 // Update the cache of affected values for this assumption (we might be
3536 // here because we just simplified the condition).
3537 AC.updateAffectedValues(cast<AssumeInst>(II));
3538 break;
3539 }
3540 case Intrinsic::experimental_guard: {
3541 // Is this guard followed by another guard? We scan forward over a small
3542 // fixed window of instructions to handle common cases with conditions
3543 // computed between guards.
3544 Instruction *NextInst = II->getNextNode();
3545 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3546 // Note: Using context-free form to avoid compile time blow up
3547 if (!isSafeToSpeculativelyExecute(NextInst))
3548 break;
3549 NextInst = NextInst->getNextNode();
3550 }
3551 Value *NextCond = nullptr;
3552 if (match(NextInst,
3554 Value *CurrCond = II->getArgOperand(0);
3555
3556 // Remove a guard that it is immediately preceded by an identical guard.
3557 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3558 if (CurrCond != NextCond) {
3559 Instruction *MoveI = II->getNextNode();
3560 while (MoveI != NextInst) {
3561 auto *Temp = MoveI;
3562 MoveI = MoveI->getNextNode();
3563 Temp->moveBefore(II->getIterator());
3564 }
3565 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3566 }
3567 eraseInstFromFunction(*NextInst);
3568 return II;
3569 }
3570 break;
3571 }
3572 case Intrinsic::vector_insert: {
3573 Value *Vec = II->getArgOperand(0);
3574 Value *SubVec = II->getArgOperand(1);
3575 Value *Idx = II->getArgOperand(2);
3576 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3577 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3578 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3579
3580 // Only canonicalize if the destination vector, Vec, and SubVec are all
3581 // fixed vectors.
3582 if (DstTy && VecTy && SubVecTy) {
3583 unsigned DstNumElts = DstTy->getNumElements();
3584 unsigned VecNumElts = VecTy->getNumElements();
3585 unsigned SubVecNumElts = SubVecTy->getNumElements();
3586 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3587
3588 // An insert that entirely overwrites Vec with SubVec is a nop.
3589 if (VecNumElts == SubVecNumElts)
3590 return replaceInstUsesWith(CI, SubVec);
3591
3592 // Widen SubVec into a vector of the same width as Vec, since
3593 // shufflevector requires the two input vectors to be the same width.
3594 // Elements beyond the bounds of SubVec within the widened vector are
3595 // undefined.
3596 SmallVector<int, 8> WidenMask;
3597 unsigned i;
3598 for (i = 0; i != SubVecNumElts; ++i)
3599 WidenMask.push_back(i);
3600 for (; i != VecNumElts; ++i)
3601 WidenMask.push_back(PoisonMaskElem);
3602
3603 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3604
3606 for (unsigned i = 0; i != IdxN; ++i)
3607 Mask.push_back(i);
3608 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3609 Mask.push_back(i);
3610 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3611 Mask.push_back(i);
3612
3613 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3614 return replaceInstUsesWith(CI, Shuffle);
3615 }
3616 break;
3617 }
3618 case Intrinsic::vector_extract: {
3619 Value *Vec = II->getArgOperand(0);
3620 Value *Idx = II->getArgOperand(1);
3621
3622 Type *ReturnType = II->getType();
3623 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3624 // ExtractIdx)
3625 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3626 Value *InsertTuple, *InsertIdx, *InsertValue;
3628 m_Value(InsertValue),
3629 m_Value(InsertIdx))) &&
3630 InsertValue->getType() == ReturnType) {
3631 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3632 // Case where we get the same index right after setting it.
3633 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3634 // InsertValue
3635 if (ExtractIdx == Index)
3636 return replaceInstUsesWith(CI, InsertValue);
3637 // If we are getting a different index than what was set in the
3638 // insert.vector intrinsic. We can just set the input tuple to the one up
3639 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3640 // InsertIndex), ExtractIndex)
3641 // --> extract.vector(InsertTuple, ExtractIndex)
3642 else
3643 return replaceOperand(CI, 0, InsertTuple);
3644 }
3645
3646 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3647 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3648
3649 if (DstTy && VecTy) {
3650 auto DstEltCnt = DstTy->getElementCount();
3651 auto VecEltCnt = VecTy->getElementCount();
3652 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3653
3654 // Extracting the entirety of Vec is a nop.
3655 if (DstEltCnt == VecTy->getElementCount()) {
3656 replaceInstUsesWith(CI, Vec);
3657 return eraseInstFromFunction(CI);
3658 }
3659
3660 // Only canonicalize to shufflevector if the destination vector and
3661 // Vec are fixed vectors.
3662 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3663 break;
3664
3666 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3667 Mask.push_back(IdxN + i);
3668
3669 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3670 return replaceInstUsesWith(CI, Shuffle);
3671 }
3672 break;
3673 }
3674 case Intrinsic::experimental_vp_reverse: {
3675 Value *X;
3676 Value *Vec = II->getArgOperand(0);
3677 Value *Mask = II->getArgOperand(1);
3678 if (!match(Mask, m_AllOnes()))
3679 break;
3680 Value *EVL = II->getArgOperand(2);
3681 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3682 // rev(unop rev(X)) --> unop X
3683 if (match(Vec,
3685 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3686 auto *OldUnOp = cast<UnaryOperator>(Vec);
3688 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3689 II->getIterator());
3690 return replaceInstUsesWith(CI, NewUnOp);
3691 }
3692 break;
3693 }
3694 case Intrinsic::vector_reduce_or:
3695 case Intrinsic::vector_reduce_and: {
3696 // Canonicalize logical or/and reductions:
3697 // Or reduction for i1 is represented as:
3698 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3699 // %res = cmp ne iReduxWidth %val, 0
3700 // And reduction for i1 is represented as:
3701 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3702 // %res = cmp eq iReduxWidth %val, 11111
3703 Value *Arg = II->getArgOperand(0);
3704 Value *Vect;
3705
3706 if (Value *NewOp =
3707 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3708 replaceUse(II->getOperandUse(0), NewOp);
3709 return II;
3710 }
3711
3712 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3713 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3714 if (FTy->getElementType() == Builder.getInt1Ty()) {
3715 Value *Res = Builder.CreateBitCast(
3716 Vect, Builder.getIntNTy(FTy->getNumElements()));
3717 if (IID == Intrinsic::vector_reduce_and) {
3718 Res = Builder.CreateICmpEQ(
3720 } else {
3721 assert(IID == Intrinsic::vector_reduce_or &&
3722 "Expected or reduction.");
3723 Res = Builder.CreateIsNotNull(Res);
3724 }
3725 if (Arg != Vect)
3726 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3727 II->getType());
3728 return replaceInstUsesWith(CI, Res);
3729 }
3730 }
3731 [[fallthrough]];
3732 }
3733 case Intrinsic::vector_reduce_add: {
3734 if (IID == Intrinsic::vector_reduce_add) {
3735 // Convert vector_reduce_add(ZExt(<n x i1>)) to
3736 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3737 // Convert vector_reduce_add(SExt(<n x i1>)) to
3738 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3739 // Convert vector_reduce_add(<n x i1>) to
3740 // Trunc(ctpop(bitcast <n x i1> to in)).
3741 Value *Arg = II->getArgOperand(0);
3742 Value *Vect;
3743
3744 if (Value *NewOp =
3745 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3746 replaceUse(II->getOperandUse(0), NewOp);
3747 return II;
3748 }
3749
3750 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3751 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3752 if (FTy->getElementType() == Builder.getInt1Ty()) {
3753 Value *V = Builder.CreateBitCast(
3754 Vect, Builder.getIntNTy(FTy->getNumElements()));
3755 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
3756 if (Res->getType() != II->getType())
3757 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
3758 if (Arg != Vect &&
3759 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
3760 Res = Builder.CreateNeg(Res);
3761 return replaceInstUsesWith(CI, Res);
3762 }
3763 }
3764 }
3765 [[fallthrough]];
3766 }
3767 case Intrinsic::vector_reduce_xor: {
3768 if (IID == Intrinsic::vector_reduce_xor) {
3769 // Exclusive disjunction reduction over the vector with
3770 // (potentially-extended) i1 element type is actually a
3771 // (potentially-extended) arithmetic `add` reduction over the original
3772 // non-extended value:
3773 // vector_reduce_xor(?ext(<n x i1>))
3774 // -->
3775 // ?ext(vector_reduce_add(<n x i1>))
3776 Value *Arg = II->getArgOperand(0);
3777 Value *Vect;
3778
3779 if (Value *NewOp =
3780 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3781 replaceUse(II->getOperandUse(0), NewOp);
3782 return II;
3783 }
3784
3785 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3786 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3787 if (VTy->getElementType() == Builder.getInt1Ty()) {
3788 Value *Res = Builder.CreateAddReduce(Vect);
3789 if (Arg != Vect)
3790 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3791 II->getType());
3792 return replaceInstUsesWith(CI, Res);
3793 }
3794 }
3795 }
3796 [[fallthrough]];
3797 }
3798 case Intrinsic::vector_reduce_mul: {
3799 if (IID == Intrinsic::vector_reduce_mul) {
3800 // Multiplicative reduction over the vector with (potentially-extended)
3801 // i1 element type is actually a (potentially zero-extended)
3802 // logical `and` reduction over the original non-extended value:
3803 // vector_reduce_mul(?ext(<n x i1>))
3804 // -->
3805 // zext(vector_reduce_and(<n x i1>))
3806 Value *Arg = II->getArgOperand(0);
3807 Value *Vect;
3808
3809 if (Value *NewOp =
3810 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3811 replaceUse(II->getOperandUse(0), NewOp);
3812 return II;
3813 }
3814
3815 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3816 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3817 if (VTy->getElementType() == Builder.getInt1Ty()) {
3818 Value *Res = Builder.CreateAndReduce(Vect);
3819 if (Res->getType() != II->getType())
3820 Res = Builder.CreateZExt(Res, II->getType());
3821 return replaceInstUsesWith(CI, Res);
3822 }
3823 }
3824 }
3825 [[fallthrough]];
3826 }
3827 case Intrinsic::vector_reduce_umin:
3828 case Intrinsic::vector_reduce_umax: {
3829 if (IID == Intrinsic::vector_reduce_umin ||
3830 IID == Intrinsic::vector_reduce_umax) {
3831 // UMin/UMax reduction over the vector with (potentially-extended)
3832 // i1 element type is actually a (potentially-extended)
3833 // logical `and`/`or` reduction over the original non-extended value:
3834 // vector_reduce_u{min,max}(?ext(<n x i1>))
3835 // -->
3836 // ?ext(vector_reduce_{and,or}(<n x i1>))
3837 Value *Arg = II->getArgOperand(0);
3838 Value *Vect;
3839
3840 if (Value *NewOp =
3841 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3842 replaceUse(II->getOperandUse(0), NewOp);
3843 return II;
3844 }
3845
3846 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3847 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3848 if (VTy->getElementType() == Builder.getInt1Ty()) {
3849 Value *Res = IID == Intrinsic::vector_reduce_umin
3850 ? Builder.CreateAndReduce(Vect)
3851 : Builder.CreateOrReduce(Vect);
3852 if (Arg != Vect)
3853 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3854 II->getType());
3855 return replaceInstUsesWith(CI, Res);
3856 }
3857 }
3858 }
3859 [[fallthrough]];
3860 }
3861 case Intrinsic::vector_reduce_smin:
3862 case Intrinsic::vector_reduce_smax: {
3863 if (IID == Intrinsic::vector_reduce_smin ||
3864 IID == Intrinsic::vector_reduce_smax) {
3865 // SMin/SMax reduction over the vector with (potentially-extended)
3866 // i1 element type is actually a (potentially-extended)
3867 // logical `and`/`or` reduction over the original non-extended value:
3868 // vector_reduce_s{min,max}(<n x i1>)
3869 // -->
3870 // vector_reduce_{or,and}(<n x i1>)
3871 // and
3872 // vector_reduce_s{min,max}(sext(<n x i1>))
3873 // -->
3874 // sext(vector_reduce_{or,and}(<n x i1>))
3875 // and
3876 // vector_reduce_s{min,max}(zext(<n x i1>))
3877 // -->
3878 // zext(vector_reduce_{and,or}(<n x i1>))
3879 Value *Arg = II->getArgOperand(0);
3880 Value *Vect;
3881
3882 if (Value *NewOp =
3883 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3884 replaceUse(II->getOperandUse(0), NewOp);
3885 return II;
3886 }
3887
3888 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3889 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
3890 if (VTy->getElementType() == Builder.getInt1Ty()) {
3891 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
3892 if (Arg != Vect)
3893 ExtOpc = cast<CastInst>(Arg)->getOpcode();
3894 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
3895 (ExtOpc == Instruction::CastOps::ZExt))
3896 ? Builder.CreateAndReduce(Vect)
3897 : Builder.CreateOrReduce(Vect);
3898 if (Arg != Vect)
3899 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
3900 return replaceInstUsesWith(CI, Res);
3901 }
3902 }
3903 }
3904 [[fallthrough]];
3905 }
3906 case Intrinsic::vector_reduce_fmax:
3907 case Intrinsic::vector_reduce_fmin:
3908 case Intrinsic::vector_reduce_fadd:
3909 case Intrinsic::vector_reduce_fmul: {
3910 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
3911 IID != Intrinsic::vector_reduce_fmul) ||
3912 II->hasAllowReassoc();
3913 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
3914 IID == Intrinsic::vector_reduce_fmul)
3915 ? 1
3916 : 0;
3917 Value *Arg = II->getArgOperand(ArgIdx);
3918 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
3919 replaceUse(II->getOperandUse(ArgIdx), NewOp);
3920 return nullptr;
3921 }
3922 break;
3923 }
3924 case Intrinsic::is_fpclass: {
3925 if (Instruction *I = foldIntrinsicIsFPClass(*II))
3926 return I;
3927 break;
3928 }
3929 case Intrinsic::threadlocal_address: {
3930 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3931 MaybeAlign Align = II->getRetAlign();
3932 if (MinAlign > Align.valueOrOne()) {
3933 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
3934 return II;
3935 }
3936 break;
3937 }
3938 case Intrinsic::frexp: {
3939 Value *X;
3940 // The first result is idempotent with the added complication of the struct
3941 // return, and the second result is zero because the value is already
3942 // normalized.
3943 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
3945 X = Builder.CreateInsertValue(
3946 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
3947 1);
3948 return replaceInstUsesWith(*II, X);
3949 }
3950 }
3951 break;
3952 }
3953 case Intrinsic::get_active_lane_mask: {
3954 const APInt *Op0, *Op1;
3955 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
3956 match(II->getOperand(1), m_APInt(Op1))) {
3957 Type *OpTy = II->getOperand(0)->getType();
3958 return replaceInstUsesWith(
3959 *II, Builder.CreateIntrinsic(
3960 II->getType(), Intrinsic::get_active_lane_mask,
3961 {Constant::getNullValue(OpTy),
3962 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
3963 }
3964 break;
3965 }
3966 default: {
3967 // Handle target specific intrinsics
3968 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
3969 if (V)
3970 return *V;
3971 break;
3972 }
3973 }
3974
3975 // Try to fold intrinsic into select/phi operands. This is legal if:
3976 // * The intrinsic is speculatable.
3977 // * The select condition is not a vector, or the intrinsic does not
3978 // perform cross-lane operations.
3981 for (Value *Op : II->args()) {
3982 if (auto *Sel = dyn_cast<SelectInst>(Op))
3983 if (Instruction *R = FoldOpIntoSelect(*II, Sel))
3984 return R;
3985 if (auto *Phi = dyn_cast<PHINode>(Op))
3986 if (Instruction *R = foldOpIntoPhi(*II, Phi))
3987 return R;
3988 }
3989
3991 return Shuf;
3992
3994 return replaceInstUsesWith(*II, Reverse);
3995
3997 return replaceInstUsesWith(*II, Res);
3998
3999 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4000 // context, so it is handled in visitCallBase and we should trigger it.
4001 return visitCallBase(*II);
4002}
4003
4004// Fence instruction simplification
4006 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4007 // This check is solely here to handle arbitrary target-dependent syncscopes.
4008 // TODO: Can remove if does not matter in practice.
4009 if (NFI && FI.isIdenticalTo(NFI))
4010 return eraseInstFromFunction(FI);
4011
4012 // Returns true if FI1 is identical or stronger fence than FI2.
4013 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4014 auto FI1SyncScope = FI1->getSyncScopeID();
4015 // Consider same scope, where scope is global or single-thread.
4016 if (FI1SyncScope != FI2->getSyncScopeID() ||
4017 (FI1SyncScope != SyncScope::System &&
4018 FI1SyncScope != SyncScope::SingleThread))
4019 return false;
4020
4021 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4022 };
4023 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4024 return eraseInstFromFunction(FI);
4025
4026 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4027 if (isIdenticalOrStrongerFence(PFI, &FI))
4028 return eraseInstFromFunction(FI);
4029 return nullptr;
4030}
4031
4032// InvokeInst simplification
4034 return visitCallBase(II);
4035}
4036
4037// CallBrInst simplification
4039 return visitCallBase(CBI);
4040}
4041
4042Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4043 if (!CI->getCalledFunction()) return nullptr;
4044
4045 // Skip optimizing notail and musttail calls so
4046 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4047 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4048 if (CI->isMustTailCall() || CI->isNoTailCall())
4049 return nullptr;
4050
4051 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4052 replaceInstUsesWith(*From, With);
4053 };
4054 auto InstCombineErase = [this](Instruction *I) {
4056 };
4057 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4058 InstCombineRAUW, InstCombineErase);
4059 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4060 ++NumSimplified;
4061 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4062 }
4063
4064 return nullptr;
4065}
4066
4068 // Strip off at most one level of pointer casts, looking for an alloca. This
4069 // is good enough in practice and simpler than handling any number of casts.
4070 Value *Underlying = TrampMem->stripPointerCasts();
4071 if (Underlying != TrampMem &&
4072 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4073 return nullptr;
4074 if (!isa<AllocaInst>(Underlying))
4075 return nullptr;
4076
4077 IntrinsicInst *InitTrampoline = nullptr;
4078 for (User *U : TrampMem->users()) {
4080 if (!II)
4081 return nullptr;
4082 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4083 if (InitTrampoline)
4084 // More than one init_trampoline writes to this value. Give up.
4085 return nullptr;
4086 InitTrampoline = II;
4087 continue;
4088 }
4089 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4090 // Allow any number of calls to adjust.trampoline.
4091 continue;
4092 return nullptr;
4093 }
4094
4095 // No call to init.trampoline found.
4096 if (!InitTrampoline)
4097 return nullptr;
4098
4099 // Check that the alloca is being used in the expected way.
4100 if (InitTrampoline->getOperand(0) != TrampMem)
4101 return nullptr;
4102
4103 return InitTrampoline;
4104}
4105
4107 Value *TrampMem) {
4108 // Visit all the previous instructions in the basic block, and try to find a
4109 // init.trampoline which has a direct path to the adjust.trampoline.
4110 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4111 E = AdjustTramp->getParent()->begin();
4112 I != E;) {
4113 Instruction *Inst = &*--I;
4115 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4116 II->getOperand(0) == TrampMem)
4117 return II;
4118 if (Inst->mayWriteToMemory())
4119 return nullptr;
4120 }
4121 return nullptr;
4122}
4123
4124// Given a call to llvm.adjust.trampoline, find and return the corresponding
4125// call to llvm.init.trampoline if the call to the trampoline can be optimized
4126// to a direct call to a function. Otherwise return NULL.
4128 Callee = Callee->stripPointerCasts();
4129 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4130 if (!AdjustTramp ||
4131 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4132 return nullptr;
4133
4134 Value *TrampMem = AdjustTramp->getOperand(0);
4135
4137 return IT;
4138 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4139 return IT;
4140 return nullptr;
4141}
4142
4143Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4144 const Value *Callee = Call.getCalledOperand();
4145 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4146 if (!IPC || !IPC->isNoopCast(DL))
4147 return nullptr;
4148
4149 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4150 if (!II)
4151 return nullptr;
4152
4153 Intrinsic::ID IIID = II->getIntrinsicID();
4154 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4155 return nullptr;
4156
4157 // Isolate the ptrauth bundle from the others.
4158 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4160 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4161 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4162 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4163 PtrAuthBundleOrNone = Bundle;
4164 else
4165 NewBundles.emplace_back(Bundle);
4166 }
4167
4168 if (!PtrAuthBundleOrNone)
4169 return nullptr;
4170
4171 Value *NewCallee = nullptr;
4172 switch (IIID) {
4173 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4174 // assuming the call bundle and the sign operands match.
4175 case Intrinsic::ptrauth_resign: {
4176 // Resign result key should match bundle.
4177 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4178 return nullptr;
4179 // Resign result discriminator should match bundle.
4180 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4181 return nullptr;
4182
4183 // Resign input (auth) key should also match: we can't change the key on
4184 // the new call we're generating, because we don't know what keys are valid.
4185 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4186 return nullptr;
4187
4188 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4189 NewBundles.emplace_back("ptrauth", NewBundleOps);
4190 NewCallee = II->getOperand(0);
4191 break;
4192 }
4193
4194 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4195 // assuming the call bundle and the sign operands match.
4196 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4197 case Intrinsic::ptrauth_sign: {
4198 // Sign key should match bundle.
4199 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4200 return nullptr;
4201 // Sign discriminator should match bundle.
4202 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4203 return nullptr;
4204 NewCallee = II->getOperand(0);
4205 break;
4206 }
4207 default:
4208 llvm_unreachable("unexpected intrinsic ID");
4209 }
4210
4211 if (!NewCallee)
4212 return nullptr;
4213
4214 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4215 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4216 NewCall->setCalledOperand(NewCallee);
4217 return NewCall;
4218}
4219
4220Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4222 if (!CPA)
4223 return nullptr;
4224
4225 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4226 // If the ptrauth constant isn't based on a function pointer, bail out.
4227 if (!CalleeF)
4228 return nullptr;
4229
4230 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4232 if (!PAB)
4233 return nullptr;
4234
4235 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4236 Value *Discriminator = PAB->Inputs[1];
4237
4238 // If the bundle doesn't match, this is probably going to fail to auth.
4239 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4240 return nullptr;
4241
4242 // If the bundle matches the constant, proceed in making this a direct call.
4244 NewCall->setCalledOperand(CalleeF);
4245 return NewCall;
4246}
4247
4248bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4249 const TargetLibraryInfo *TLI) {
4250 // Note: We only handle cases which can't be driven from generic attributes
4251 // here. So, for example, nonnull and noalias (which are common properties
4252 // of some allocation functions) are expected to be handled via annotation
4253 // of the respective allocator declaration with generic attributes.
4254 bool Changed = false;
4255
4256 if (!Call.getType()->isPointerTy())
4257 return Changed;
4258
4259 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4260 if (Size && *Size != 0) {
4261 // TODO: We really should just emit deref_or_null here and then
4262 // let the generic inference code combine that with nonnull.
4263 if (Call.hasRetAttr(Attribute::NonNull)) {
4264 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4266 Call.getContext(), Size->getLimitedValue()));
4267 } else {
4268 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4270 Call.getContext(), Size->getLimitedValue()));
4271 }
4272 }
4273
4274 // Add alignment attribute if alignment is a power of two constant.
4275 Value *Alignment = getAllocAlignment(&Call, TLI);
4276 if (!Alignment)
4277 return Changed;
4278
4279 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4280 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4281 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4282 if (llvm::isPowerOf2_64(AlignmentVal)) {
4283 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4284 Align NewAlign = Align(AlignmentVal);
4285 if (NewAlign > ExistingAlign) {
4288 Changed = true;
4289 }
4290 }
4291 }
4292 return Changed;
4293}
4294
4295/// Improvements for call, callbr and invoke instructions.
4296Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4297 bool Changed = annotateAnyAllocSite(Call, &TLI);
4298
4299 // Mark any parameters that are known to be non-null with the nonnull
4300 // attribute. This is helpful for inlining calls to functions with null
4301 // checks on their arguments.
4302 SmallVector<unsigned, 4> ArgNos;
4303 unsigned ArgNo = 0;
4304
4305 for (Value *V : Call.args()) {
4306 if (V->getType()->isPointerTy()) {
4307 // Simplify the nonnull operand if the parameter is known to be nonnull.
4308 // Otherwise, try to infer nonnull for it.
4309 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4310 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4311 (HasDereferenceable &&
4313 V->getType()->getPointerAddressSpace()))) {
4314 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4315 replaceOperand(Call, ArgNo, Res);
4316 Changed = true;
4317 }
4318 } else if (isKnownNonZero(V,
4319 getSimplifyQuery().getWithInstruction(&Call))) {
4320 ArgNos.push_back(ArgNo);
4321 }
4322 }
4323 ArgNo++;
4324 }
4325
4326 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4327
4328 if (!ArgNos.empty()) {
4329 AttributeList AS = Call.getAttributes();
4330 LLVMContext &Ctx = Call.getContext();
4331 AS = AS.addParamAttribute(Ctx, ArgNos,
4332 Attribute::get(Ctx, Attribute::NonNull));
4333 Call.setAttributes(AS);
4334 Changed = true;
4335 }
4336
4337 // If the callee is a pointer to a function, attempt to move any casts to the
4338 // arguments of the call/callbr/invoke.
4340 Function *CalleeF = dyn_cast<Function>(Callee);
4341 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4342 transformConstExprCastCall(Call))
4343 return nullptr;
4344
4345 if (CalleeF) {
4346 // Remove the convergent attr on calls when the callee is not convergent.
4347 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4348 !CalleeF->isIntrinsic()) {
4349 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4350 << "\n");
4352 return &Call;
4353 }
4354
4355 // If the call and callee calling conventions don't match, and neither one
4356 // of the calling conventions is compatible with C calling convention
4357 // this call must be unreachable, as the call is undefined.
4358 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4359 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4363 // Only do this for calls to a function with a body. A prototype may
4364 // not actually end up matching the implementation's calling conv for a
4365 // variety of reasons (e.g. it may be written in assembly).
4366 !CalleeF->isDeclaration()) {
4367 Instruction *OldCall = &Call;
4369 // If OldCall does not return void then replaceInstUsesWith poison.
4370 // This allows ValueHandlers and custom metadata to adjust itself.
4371 if (!OldCall->getType()->isVoidTy())
4372 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4373 if (isa<CallInst>(OldCall))
4374 return eraseInstFromFunction(*OldCall);
4375
4376 // We cannot remove an invoke or a callbr, because it would change thexi
4377 // CFG, just change the callee to a null pointer.
4378 cast<CallBase>(OldCall)->setCalledFunction(
4379 CalleeF->getFunctionType(),
4380 Constant::getNullValue(CalleeF->getType()));
4381 return nullptr;
4382 }
4383 }
4384
4385 // Calling a null function pointer is undefined if a null address isn't
4386 // dereferenceable.
4387 if ((isa<ConstantPointerNull>(Callee) &&
4389 isa<UndefValue>(Callee)) {
4390 // If Call does not return void then replaceInstUsesWith poison.
4391 // This allows ValueHandlers and custom metadata to adjust itself.
4392 if (!Call.getType()->isVoidTy())
4394
4395 if (Call.isTerminator()) {
4396 // Can't remove an invoke or callbr because we cannot change the CFG.
4397 return nullptr;
4398 }
4399
4400 // This instruction is not reachable, just remove it.
4403 }
4404
4405 if (IntrinsicInst *II = findInitTrampoline(Callee))
4406 return transformCallThroughTrampoline(Call, *II);
4407
4408 // Combine calls involving pointer authentication intrinsics.
4409 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4410 return NewCall;
4411
4412 // Combine calls to ptrauth constants.
4413 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4414 return NewCall;
4415
4416 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4417 InlineAsm *IA = cast<InlineAsm>(Callee);
4418 if (!IA->canThrow()) {
4419 // Normal inline asm calls cannot throw - mark them
4420 // 'nounwind'.
4422 Changed = true;
4423 }
4424 }
4425
4426 // Try to optimize the call if possible, we require DataLayout for most of
4427 // this. None of these calls are seen as possibly dead so go ahead and
4428 // delete the instruction now.
4429 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4430 Instruction *I = tryOptimizeCall(CI);
4431 // If we changed something return the result, etc. Otherwise let
4432 // the fallthrough check.
4433 if (I) return eraseInstFromFunction(*I);
4434 }
4435
4436 if (!Call.use_empty() && !Call.isMustTailCall())
4437 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4438 Type *CallTy = Call.getType();
4439 Type *RetArgTy = ReturnedArg->getType();
4440 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4441 return replaceInstUsesWith(
4442 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4443 }
4444
4445 // Drop unnecessary callee_type metadata from calls that were converted
4446 // into direct calls.
4447 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4448 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4449 Changed = true;
4450 }
4451
4452 // Drop unnecessary kcfi operand bundles from calls that were converted
4453 // into direct calls.
4455 if (Bundle && !Call.isIndirectCall()) {
4456 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4457 if (CalleeF) {
4458 ConstantInt *FunctionType = nullptr;
4459 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4460
4461 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4462 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4463
4464 if (FunctionType &&
4465 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4466 dbgs() << Call.getModule()->getName()
4467 << ": warning: kcfi: " << Call.getCaller()->getName()
4468 << ": call to " << CalleeF->getName()
4469 << " using a mismatching function pointer type\n";
4470 }
4471 });
4472
4474 }
4475
4476 if (isRemovableAlloc(&Call, &TLI))
4477 return visitAllocSite(Call);
4478
4479 // Handle intrinsics which can be used in both call and invoke context.
4480 switch (Call.getIntrinsicID()) {
4481 case Intrinsic::experimental_gc_statepoint: {
4482 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4483 SmallPtrSet<Value *, 32> LiveGcValues;
4484 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4485 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4486
4487 // Remove the relocation if unused.
4488 if (GCR.use_empty()) {
4490 continue;
4491 }
4492
4493 Value *DerivedPtr = GCR.getDerivedPtr();
4494 Value *BasePtr = GCR.getBasePtr();
4495
4496 // Undef is undef, even after relocation.
4497 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4500 continue;
4501 }
4502
4503 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4504 // The relocation of null will be null for most any collector.
4505 // TODO: provide a hook for this in GCStrategy. There might be some
4506 // weird collector this property does not hold for.
4507 if (isa<ConstantPointerNull>(DerivedPtr)) {
4508 // Use null-pointer of gc_relocate's type to replace it.
4511 continue;
4512 }
4513
4514 // isKnownNonNull -> nonnull attribute
4515 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4516 isKnownNonZero(DerivedPtr,
4517 getSimplifyQuery().getWithInstruction(&Call))) {
4518 GCR.addRetAttr(Attribute::NonNull);
4519 // We discovered new fact, re-check users.
4520 Worklist.pushUsersToWorkList(GCR);
4521 }
4522 }
4523
4524 // If we have two copies of the same pointer in the statepoint argument
4525 // list, canonicalize to one. This may let us common gc.relocates.
4526 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4527 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4528 auto *OpIntTy = GCR.getOperand(2)->getType();
4529 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4530 }
4531
4532 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4533 // Canonicalize on the type from the uses to the defs
4534
4535 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4536 LiveGcValues.insert(BasePtr);
4537 LiveGcValues.insert(DerivedPtr);
4538 }
4539 std::optional<OperandBundleUse> Bundle =
4541 unsigned NumOfGCLives = LiveGcValues.size();
4542 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4543 break;
4544 // We can reduce the size of gc live bundle.
4545 DenseMap<Value *, unsigned> Val2Idx;
4546 std::vector<Value *> NewLiveGc;
4547 for (Value *V : Bundle->Inputs) {
4548 auto [It, Inserted] = Val2Idx.try_emplace(V);
4549 if (!Inserted)
4550 continue;
4551 if (LiveGcValues.count(V)) {
4552 It->second = NewLiveGc.size();
4553 NewLiveGc.push_back(V);
4554 } else
4555 It->second = NumOfGCLives;
4556 }
4557 // Update all gc.relocates
4558 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4559 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4560 Value *BasePtr = GCR.getBasePtr();
4561 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4562 "Missed live gc for base pointer");
4563 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4564 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4565 Value *DerivedPtr = GCR.getDerivedPtr();
4566 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4567 "Missed live gc for derived pointer");
4568 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4569 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4570 }
4571 // Create new statepoint instruction.
4572 OperandBundleDef NewBundle("gc-live", NewLiveGc);
4573 return CallBase::Create(&Call, NewBundle);
4574 }
4575 default: { break; }
4576 }
4577
4578 return Changed ? &Call : nullptr;
4579}
4580
4581/// If the callee is a constexpr cast of a function, attempt to move the cast to
4582/// the arguments of the call/invoke.
4583/// CallBrInst is not supported.
4584bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4585 auto *Callee =
4587 if (!Callee)
4588 return false;
4589
4591 "CallBr's don't have a single point after a def to insert at");
4592
4593 // Don't perform the transform for declarations, which may not be fully
4594 // accurate. For example, void @foo() is commonly used as a placeholder for
4595 // unknown prototypes.
4596 if (Callee->isDeclaration())
4597 return false;
4598
4599 // If this is a call to a thunk function, don't remove the cast. Thunks are
4600 // used to transparently forward all incoming parameters and outgoing return
4601 // values, so it's important to leave the cast in place.
4602 if (Callee->hasFnAttribute("thunk"))
4603 return false;
4604
4605 // If this is a call to a naked function, the assembly might be
4606 // using an argument, or otherwise rely on the frame layout,
4607 // the function prototype will mismatch.
4608 if (Callee->hasFnAttribute(Attribute::Naked))
4609 return false;
4610
4611 // If this is a musttail call, the callee's prototype must match the caller's
4612 // prototype with the exception of pointee types. The code below doesn't
4613 // implement that, so we can't do this transform.
4614 // TODO: Do the transform if it only requires adding pointer casts.
4615 if (Call.isMustTailCall())
4616 return false;
4617
4619 const AttributeList &CallerPAL = Call.getAttributes();
4620
4621 // Okay, this is a cast from a function to a different type. Unless doing so
4622 // would cause a type conversion of one of our arguments, change this call to
4623 // be a direct call with arguments casted to the appropriate types.
4624 FunctionType *FT = Callee->getFunctionType();
4625 Type *OldRetTy = Caller->getType();
4626 Type *NewRetTy = FT->getReturnType();
4627
4628 // Check to see if we are changing the return type...
4629 if (OldRetTy != NewRetTy) {
4630
4631 if (NewRetTy->isStructTy())
4632 return false; // TODO: Handle multiple return values.
4633
4634 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4635 if (!Caller->use_empty())
4636 return false; // Cannot transform this return value.
4637 }
4638
4639 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4640 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4641 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
4642 NewRetTy, CallerPAL.getRetAttrs())))
4643 return false; // Attribute not compatible with transformed value.
4644 }
4645
4646 // If the callbase is an invoke instruction, and the return value is
4647 // used by a PHI node in a successor, we cannot change the return type of
4648 // the call because there is no place to put the cast instruction (without
4649 // breaking the critical edge). Bail out in this case.
4650 if (!Caller->use_empty()) {
4651 BasicBlock *PhisNotSupportedBlock = nullptr;
4652 if (auto *II = dyn_cast<InvokeInst>(Caller))
4653 PhisNotSupportedBlock = II->getNormalDest();
4654 if (PhisNotSupportedBlock)
4655 for (User *U : Caller->users())
4656 if (PHINode *PN = dyn_cast<PHINode>(U))
4657 if (PN->getParent() == PhisNotSupportedBlock)
4658 return false;
4659 }
4660 }
4661
4662 unsigned NumActualArgs = Call.arg_size();
4663 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4664
4665 // Prevent us turning:
4666 // declare void @takes_i32_inalloca(i32* inalloca)
4667 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4668 //
4669 // into:
4670 // call void @takes_i32_inalloca(i32* null)
4671 //
4672 // Similarly, avoid folding away bitcasts of byval calls.
4673 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4674 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
4675 return false;
4676
4677 auto AI = Call.arg_begin();
4678 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4679 Type *ParamTy = FT->getParamType(i);
4680 Type *ActTy = (*AI)->getType();
4681
4682 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4683 return false; // Cannot transform this parameter value.
4684
4685 // Check if there are any incompatible attributes we cannot drop safely.
4686 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
4687 .overlaps(AttributeFuncs::typeIncompatible(
4688 ParamTy, CallerPAL.getParamAttrs(i),
4689 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
4690 return false; // Attribute not compatible with transformed value.
4691
4692 if (Call.isInAllocaArgument(i) ||
4693 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
4694 return false; // Cannot transform to and from inalloca/preallocated.
4695
4696 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
4697 return false;
4698
4699 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
4700 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
4701 return false; // Cannot transform to or from byval.
4702 }
4703
4704 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4705 !CallerPAL.isEmpty()) {
4706 // In this case we have more arguments than the new function type, but we
4707 // won't be dropping them. Check that these extra arguments have attributes
4708 // that are compatible with being a vararg call argument.
4709 unsigned SRetIdx;
4710 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4711 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
4712 return false;
4713 }
4714
4715 // Okay, we decided that this is a safe thing to do: go ahead and start
4716 // inserting cast instructions as necessary.
4717 SmallVector<Value *, 8> Args;
4719 Args.reserve(NumActualArgs);
4720 ArgAttrs.reserve(NumActualArgs);
4721
4722 // Get any return attributes.
4723 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4724
4725 // If the return value is not being used, the type may not be compatible
4726 // with the existing attributes. Wipe out any problematic attributes.
4727 RAttrs.remove(
4728 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
4729
4730 LLVMContext &Ctx = Call.getContext();
4731 AI = Call.arg_begin();
4732 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4733 Type *ParamTy = FT->getParamType(i);
4734
4735 Value *NewArg = *AI;
4736 if ((*AI)->getType() != ParamTy)
4737 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4738 Args.push_back(NewArg);
4739
4740 // Add any parameter attributes except the ones incompatible with the new
4741 // type. Note that we made sure all incompatible ones are safe to drop.
4742 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
4743 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
4744 ArgAttrs.push_back(
4745 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
4746 }
4747
4748 // If the function takes more arguments than the call was taking, add them
4749 // now.
4750 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4751 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
4752 ArgAttrs.push_back(AttributeSet());
4753 }
4754
4755 // If we are removing arguments to the function, emit an obnoxious warning.
4756 if (FT->getNumParams() < NumActualArgs) {
4757 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4758 if (FT->isVarArg()) {
4759 // Add all of the arguments in their promoted form to the arg list.
4760 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4761 Type *PTy = getPromotedType((*AI)->getType());
4762 Value *NewArg = *AI;
4763 if (PTy != (*AI)->getType()) {
4764 // Must promote to pass through va_arg area!
4765 Instruction::CastOps opcode =
4766 CastInst::getCastOpcode(*AI, false, PTy, false);
4767 NewArg = Builder.CreateCast(opcode, *AI, PTy);
4768 }
4769 Args.push_back(NewArg);
4770
4771 // Add any parameter attributes.
4772 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
4773 }
4774 }
4775 }
4776
4777 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
4778
4779 if (NewRetTy->isVoidTy())
4780 Caller->setName(""); // Void type should not have a name.
4781
4782 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4783 "missing argument attributes");
4784 AttributeList NewCallerPAL = AttributeList::get(
4785 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4786
4788 Call.getOperandBundlesAsDefs(OpBundles);
4789
4790 CallBase *NewCall;
4791 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4792 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
4793 II->getUnwindDest(), Args, OpBundles);
4794 } else {
4795 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
4796 cast<CallInst>(NewCall)->setTailCallKind(
4797 cast<CallInst>(Caller)->getTailCallKind());
4798 }
4799 NewCall->takeName(Caller);
4801 NewCall->setAttributes(NewCallerPAL);
4802
4803 // Preserve prof metadata if any.
4804 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
4805
4806 // Insert a cast of the return type as necessary.
4807 Instruction *NC = NewCall;
4808 Value *NV = NC;
4809 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4810 assert(!NV->getType()->isVoidTy());
4812 NC->setDebugLoc(Caller->getDebugLoc());
4813
4814 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
4815 assert(OptInsertPt && "No place to insert cast");
4816 InsertNewInstBefore(NC, *OptInsertPt);
4817 Worklist.pushUsersToWorkList(*Caller);
4818 }
4819
4820 if (!Caller->use_empty())
4821 replaceInstUsesWith(*Caller, NV);
4822 else if (Caller->hasValueHandle()) {
4823 if (OldRetTy == NV->getType())
4825 else
4826 // We cannot call ValueIsRAUWd with a different type, and the
4827 // actual tracked value will disappear.
4829 }
4830
4831 eraseInstFromFunction(*Caller);
4832 return true;
4833}
4834
4835/// Turn a call to a function created by init_trampoline / adjust_trampoline
4836/// intrinsic pair into a direct call to the underlying function.
4838InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
4839 IntrinsicInst &Tramp) {
4840 FunctionType *FTy = Call.getFunctionType();
4841 AttributeList Attrs = Call.getAttributes();
4842
4843 // If the call already has the 'nest' attribute somewhere then give up -
4844 // otherwise 'nest' would occur twice after splicing in the chain.
4845 if (Attrs.hasAttrSomewhere(Attribute::Nest))
4846 return nullptr;
4847
4849 FunctionType *NestFTy = NestF->getFunctionType();
4850
4851 AttributeList NestAttrs = NestF->getAttributes();
4852 if (!NestAttrs.isEmpty()) {
4853 unsigned NestArgNo = 0;
4854 Type *NestTy = nullptr;
4855 AttributeSet NestAttr;
4856
4857 // Look for a parameter marked with the 'nest' attribute.
4858 for (FunctionType::param_iterator I = NestFTy->param_begin(),
4859 E = NestFTy->param_end();
4860 I != E; ++NestArgNo, ++I) {
4861 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
4862 if (AS.hasAttribute(Attribute::Nest)) {
4863 // Record the parameter type and any other attributes.
4864 NestTy = *I;
4865 NestAttr = AS;
4866 break;
4867 }
4868 }
4869
4870 if (NestTy) {
4871 std::vector<Value*> NewArgs;
4872 std::vector<AttributeSet> NewArgAttrs;
4873 NewArgs.reserve(Call.arg_size() + 1);
4874 NewArgAttrs.reserve(Call.arg_size());
4875
4876 // Insert the nest argument into the call argument list, which may
4877 // mean appending it. Likewise for attributes.
4878
4879 {
4880 unsigned ArgNo = 0;
4881 auto I = Call.arg_begin(), E = Call.arg_end();
4882 do {
4883 if (ArgNo == NestArgNo) {
4884 // Add the chain argument and attributes.
4885 Value *NestVal = Tramp.getArgOperand(2);
4886 if (NestVal->getType() != NestTy)
4887 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4888 NewArgs.push_back(NestVal);
4889 NewArgAttrs.push_back(NestAttr);
4890 }
4891
4892 if (I == E)
4893 break;
4894
4895 // Add the original argument and attributes.
4896 NewArgs.push_back(*I);
4897 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
4898
4899 ++ArgNo;
4900 ++I;
4901 } while (true);
4902 }
4903
4904 // The trampoline may have been bitcast to a bogus type (FTy).
4905 // Handle this by synthesizing a new function type, equal to FTy
4906 // with the chain parameter inserted.
4907
4908 std::vector<Type*> NewTypes;
4909 NewTypes.reserve(FTy->getNumParams()+1);
4910
4911 // Insert the chain's type into the list of parameter types, which may
4912 // mean appending it.
4913 {
4914 unsigned ArgNo = 0;
4915 FunctionType::param_iterator I = FTy->param_begin(),
4916 E = FTy->param_end();
4917
4918 do {
4919 if (ArgNo == NestArgNo)
4920 // Add the chain's type.
4921 NewTypes.push_back(NestTy);
4922
4923 if (I == E)
4924 break;
4925
4926 // Add the original type.
4927 NewTypes.push_back(*I);
4928
4929 ++ArgNo;
4930 ++I;
4931 } while (true);
4932 }
4933
4934 // Replace the trampoline call with a direct call. Let the generic
4935 // code sort out any function type mismatches.
4936 FunctionType *NewFTy =
4937 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
4938 AttributeList NewPAL =
4939 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
4940 Attrs.getRetAttrs(), NewArgAttrs);
4941
4943 Call.getOperandBundlesAsDefs(OpBundles);
4944
4945 Instruction *NewCaller;
4946 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
4947 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
4948 II->getUnwindDest(), NewArgs, OpBundles);
4949 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4950 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4951 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
4952 NewCaller =
4953 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
4954 CBI->getIndirectDests(), NewArgs, OpBundles);
4955 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
4956 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
4957 } else {
4958 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
4959 cast<CallInst>(NewCaller)->setTailCallKind(
4960 cast<CallInst>(Call).getTailCallKind());
4961 cast<CallInst>(NewCaller)->setCallingConv(
4962 cast<CallInst>(Call).getCallingConv());
4963 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4964 }
4965 NewCaller->setDebugLoc(Call.getDebugLoc());
4966
4967 return NewCaller;
4968 }
4969 }
4970
4971 // Replace the trampoline call with a direct call. Since there is no 'nest'
4972 // parameter, there is no need to adjust the argument list. Let the generic
4973 // code sort out any function type mismatches.
4974 Call.setCalledFunction(FTy, NestF);
4975 return &Call;
4976}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Value * simplifyNeonTbl1(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Convert a table lookup to shufflevector if the mask is constant.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp, ZeroUndef),...
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
bool isNegative() const
Definition APFloat.h:1449
void clearSign()
Definition APFloat.h:1298
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1948
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1928
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2036
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:312
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:304
This class represents any memset intrinsic.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:236
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:279
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:240
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:681
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:684
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:682
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:683
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:686
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:689
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:694
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:873
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:813
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setNoSignedZeros(bool B=true)
Definition FMF.h:84
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
Class to represent function types.
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:610
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:594
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:249
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2082
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2439
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2209
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
SimplifyQuery SQ
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1077
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:269
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:128
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:154
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:147
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1228
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1281
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:226
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:829
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:130
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:355
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty, true > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty, true > > > m_c_MaxOrMin(const LHS &L, const RHS &R)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
CastOperator_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:201
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
constexpr double e
Definition MathExtras.h:47
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:355
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1643
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:368
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1598
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1743
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI AssumeInst * buildAssumeFromKnowledge(ArrayRef< RetainedKnowledge > Knowledge, Instruction *CtxI, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Build and return a new assume created from the provided knowledge if the knowledge in the assume is f...
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1579
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1616
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:101
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:235
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:267
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:282
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:104
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:241
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:98
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:273
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:279
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:141
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const