34#include "llvm/IR/IntrinsicsHexagon.h"
57#define DEBUG_TYPE "hexagon-vc"
72class HexagonVectorCombine {
77 :
F(F_),
DL(
F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
87 Type *getByteTy(
int ElemCount = 0)
const;
90 Type *getBoolTy(
int ElemCount = 0)
const;
94 std::optional<APInt> getIntValue(
const Value *Val)
const;
100 bool isTrue(
const Value *Val)
const;
102 bool isFalse(
const Value *Val)
const;
111 int getSizeOf(
const Value *Val, SizeKind Kind = Store)
const;
112 int getSizeOf(
const Type *Ty, SizeKind Kind = Store)
const;
113 int getTypeAlignment(
Type *Ty)
const;
114 size_t length(
Value *Val)
const;
115 size_t length(
Type *Ty)
const;
124 int Length,
int Where)
const;
148 unsigned ToWidth)
const;
152 std::optional<int> calculatePointerDifference(
Value *Ptr0,
Value *Ptr1)
const;
154 unsigned getNumSignificantBits(
const Value *V,
161 template <
typename T = std::vector<Instruction *>>
164 const T &IgnoreInsts = {})
const;
167 [[maybe_unused]]
bool isByteVecTy(
Type *Ty)
const;
180 int Start,
int Length)
const;
199 AlignVectors(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
204 using InstList = std::vector<Instruction *>;
208 AddrInfo(
const AddrInfo &) =
default;
211 : Inst(
I),
Addr(
A), ValTy(
T), HaveAlign(
H),
212 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
213 AddrInfo &operator=(
const AddrInfo &) =
default;
224 using AddrList = std::vector<AddrInfo>;
228 return A->comesBefore(
B);
231 using DepList = std::set<Instruction *, InstrLess>;
234 MoveGroup(
const AddrInfo &AI,
Instruction *
B,
bool Hvx,
bool Load)
235 :
Base(
B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
236 MoveGroup() =
default;
244 using MoveList = std::vector<MoveGroup>;
264 Segment(
Value *Val,
int Begin,
int Len)
265 : Val(Val), Start(Begin), Size(Len) {}
266 Segment(
const Segment &Seg) =
default;
267 Segment &operator=(
const Segment &Seg) =
default;
274 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
275 Block(
Value *Val,
int Off,
int Len,
int Pos)
276 : Seg(Val, Off, Len), Pos(Pos) {}
277 Block(
const Block &Blk) =
default;
278 Block &operator=(
const Block &Blk) =
default;
284 ByteSpan section(
int Start,
int Length)
const;
285 ByteSpan &shift(
int Offset);
289 Block &operator[](
int i) {
return Blocks[i]; }
290 const Block &operator[](
int i)
const {
return Blocks[i]; }
292 std::vector<Block>
Blocks;
294 using iterator =
decltype(
Blocks)::iterator;
295 iterator begin() {
return Blocks.begin(); }
296 iterator end() {
return Blocks.end(); }
302 Align getAlignFromValue(
const Value *V)
const;
303 std::optional<AddrInfo> getAddrInfo(
Instruction &In)
const;
304 bool isHvx(
const AddrInfo &AI)
const;
306 [[maybe_unused]]
bool isSectorTy(
Type *Ty)
const;
314 const InstMap &CloneMap = InstMap())
const;
317 const InstMap &CloneMap = InstMap())
const;
341 bool createAddressGroups();
342 MoveList createLoadGroups(
const AddrList &Group)
const;
343 MoveList createStoreGroups(
const AddrList &Group)
const;
344 bool moveTogether(MoveGroup &Move)
const;
345 template <
typename T>
348 void realignLoadGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
349 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
350 void realignStoreGroup(
IRBuilderBase &Builder,
const ByteSpan &VSpan,
351 int ScLen,
Value *AlignVal,
Value *AlignAddr)
const;
352 bool realignGroup(
const MoveGroup &Move)
const;
355 int Alignment)
const;
362 std::map<Instruction *, AddrList> AddrGroups;
363 const HexagonVectorCombine &HVC;
368 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
369 OS <<
"Addr: " << *AI.Addr <<
'\n';
370 OS <<
"Type: " << *AI.ValTy <<
'\n';
371 OS <<
"HaveAlign: " << AI.HaveAlign.value() <<
'\n';
372 OS <<
"NeedAlign: " << AI.NeedAlign.value() <<
'\n';
373 OS <<
"Offset: " << AI.Offset;
379 OS <<
"IsLoad:" << (MG.IsLoad ?
"yes" :
"no");
380 OS <<
", IsHvx:" << (MG.IsHvx ?
"yes" :
"no") <<
'\n';
383 OS <<
" " << *
I <<
'\n';
386 OS <<
" " << *
I <<
'\n';
388 for (
auto [K, V] : MG.Clones) {
390 K->printAsOperand(
OS,
false);
391 OS <<
"\t-> " << *V <<
'\n';
398 const AlignVectors::ByteSpan::Block &
B) {
399 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] ";
400 if (
B.Seg.Val ==
reinterpret_cast<const Value *
>(&
B)) {
401 OS <<
"(self:" <<
B.Seg.Val <<
')';
402 }
else if (
B.Seg.Val !=
nullptr) {
412 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
413 for (
const AlignVectors::ByteSpan::Block &
B : BS)
421 HvxIdioms(
const HexagonVectorCombine &HVC_) : HVC(HVC_) {
422 auto *Int32Ty = HVC.getIntTy(32);
423 HvxI32Ty = HVC.getHvxTy(Int32Ty,
false);
424 HvxP32Ty = HVC.getHvxTy(Int32Ty,
true);
430 enum Signedness { Positive, Signed, Unsigned };
445 std::optional<unsigned> RoundAt;
450 -> std::pair<unsigned, Signedness>;
451 auto canonSgn(SValue
X, SValue
Y)
const -> std::pair<SValue, SValue>;
453 auto matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp>;
457 const FxpOp &
Op)
const ->
Value *;
459 bool Rounding)
const ->
Value *;
461 bool Rounding)
const ->
Value *;
464 Value *CarryIn =
nullptr)
const
465 -> std::pair<Value *, Value *>;
470 -> std::pair<Value *, Value *>;
479 const HexagonVectorCombine &HVC;
485 const HvxIdioms::FxpOp &
Op) {
486 static const char *SgnNames[] = {
"Positive",
"Signed",
"Unsigned"};
488 if (
Op.RoundAt.has_value()) {
489 if (
Op.Frac != 0 && *
Op.RoundAt ==
Op.Frac - 1) {
492 OS <<
" + 1<<" << *
Op.RoundAt;
495 OS <<
"\n X:(" << SgnNames[
Op.X.Sgn] <<
") " << *
Op.X.Val <<
"\n"
496 <<
" Y:(" << SgnNames[
Op.Y.Sgn] <<
") " << *
Op.Y.Val;
504template <
typename T>
T *getIfUnordered(
T *MaybeT) {
505 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
508 return dyn_cast<T>(In);
511 return getIfUnordered(dyn_cast<LoadInst>(In));
514 return getIfUnordered(dyn_cast<StoreInst>(In));
517#if !defined(_MSC_VER) || _MSC_VER >= 1926
521template <
typename Pred,
typename... Ts>
522void erase_if(std::map<Ts...> &map, Pred p)
524template <
typename Pred,
typename T,
typename U>
525void erase_if(std::map<T, U> &map, Pred p)
528 for (
auto i = map.begin(), e = map.end(); i != e;) {
537template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred p) {
575auto AlignVectors::ByteSpan::extent()
const ->
int {
580 for (
int i = 1, e =
size(); i !=
e; ++i) {
581 Min = std::min(Min,
Blocks[i].Pos);
587auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
589 for (
const ByteSpan::Block &
B :
Blocks) {
590 int L = std::max(
B.Pos, Start);
591 int R = std::min(
B.Pos +
B.Seg.Size, Start +
Length);
594 int Off =
L >
B.Pos ?
L -
B.Pos : 0;
595 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
601auto AlignVectors::ByteSpan::shift(
int Offset) -> ByteSpan & {
609 for (
int i = 0, e =
Blocks.size(); i != e; ++i)
610 Values[i] =
Blocks[i].Seg.Val;
614auto AlignVectors::getAlignFromValue(
const Value *V)
const ->
Align {
615 const auto *
C = dyn_cast<ConstantInt>(V);
616 assert(
C &&
"Alignment must be a compile-time constant integer");
617 return C->getAlignValue();
620auto AlignVectors::getAddrInfo(
Instruction &In)
const
621 -> std::optional<AddrInfo> {
622 if (
auto *L = isCandidate<LoadInst>(&In))
623 return AddrInfo(HVC, L,
L->getPointerOperand(),
L->getType(),
625 if (
auto *S = isCandidate<StoreInst>(&In))
626 return AddrInfo(HVC, S, S->getPointerOperand(),
627 S->getValueOperand()->getType(), S->getAlign());
628 if (
auto *
II = isCandidate<IntrinsicInst>(&In)) {
631 case Intrinsic::masked_load:
632 return AddrInfo(HVC,
II,
II->getArgOperand(0),
II->getType(),
633 getAlignFromValue(
II->getArgOperand(1)));
634 case Intrinsic::masked_store:
635 return AddrInfo(HVC,
II,
II->getArgOperand(1),
636 II->getArgOperand(0)->getType(),
637 getAlignFromValue(
II->getArgOperand(2)));
643auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
644 return HVC.HST.isTypeForHVX(AI.ValTy);
647auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
648 if (
auto *In = dyn_cast<Instruction>(Val)) {
650 if (
auto *
II = dyn_cast<IntrinsicInst>(In))
651 ID =
II->getIntrinsicID();
652 if (isa<StoreInst>(In) ||
ID == Intrinsic::masked_store)
653 return In->getOperand(0);
658auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
659 if (
auto *
II = dyn_cast<IntrinsicInst>(Val)) {
660 switch (
II->getIntrinsicID()) {
661 case Intrinsic::masked_load:
662 return II->getArgOperand(2);
663 case Intrinsic::masked_store:
664 return II->getArgOperand(3);
668 Type *ValTy = getPayload(Val)->getType();
669 if (
auto *VecTy = dyn_cast<VectorType>(ValTy))
670 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
671 return HVC.getFullValue(HVC.getBoolTy());
674auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
675 if (
auto *
II = dyn_cast<IntrinsicInst>(Val)) {
676 if (
II->getIntrinsicID() == Intrinsic::masked_load)
677 return II->getArgOperand(3);
683 Type *ValTy,
int Adjust,
684 const InstMap &CloneMap)
const
686 if (
auto *
I = dyn_cast<Instruction>(
Ptr))
689 return Builder.CreatePtrAdd(
Ptr, HVC.getConstInt(Adjust),
"gep");
693 Type *ValTy,
int Alignment,
694 const InstMap &CloneMap)
const
697 if (
auto *
I = dyn_cast<Instruction>(V)) {
698 for (
auto [Old, New] : CloneMap)
699 I->replaceUsesOfWith(Old, New);
704 Value *AsInt = Builder.CreatePtrToInt(
Ptr, HVC.getIntTy(),
"pti");
705 Value *
Mask = HVC.getConstInt(-Alignment);
706 Value *
And = Builder.CreateAnd(remap(AsInt),
Mask,
"and");
707 return Builder.CreateIntToPtr(
715 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
719 "Expectning scalar predicate");
722 if (!HVC.isTrue(
Predicate) && HvxHasPredLoad) {
724 Alignment, MDSources);
725 return Builder.CreateSelect(Mask, Load, PassThru);
729 assert(!HVC.isUndef(Mask));
730 if (HVC.isZero(Mask))
732 if (HVC.isTrue(Mask))
733 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
736 Mask, PassThru,
"mld");
746 Builder.CreateAlignedLoad(ValTy,
Ptr,
Align(Alignment),
"ald");
756 assert(HVC.HST.isTypeForHVX(ValTy) &&
757 "Predicates 'scalar' vector loads not yet supported");
759 assert(!
Predicate->getType()->isVectorTy() &&
"Expectning scalar predicate");
760 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
764 return createSimpleLoad(Builder, ValTy,
Ptr, Alignment, MDSources);
766 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
768 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
776 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
779 "Expectning scalar predicate"));
788 if (HVC.isTrue(Mask)) {
790 return createPredicatedStore(Builder, Val,
Ptr,
Predicate, Alignment,
794 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
800 Builder.CreateMaskedStore(Val,
Ptr,
Align(Alignment), Mask);
807 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(),
Ptr,
809 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
810 return createPredicatedStore(Builder, Mux,
Ptr,
Predicate, Alignment,
828 assert(HVC.HST.isTypeForHVX(Val->getType()) &&
829 "Predicates 'scalar' vector stores not yet supported");
834 return createSimpleStore(Builder, Val,
Ptr, Alignment, MDSources);
836 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
837 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
839 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai,
nullptr,
848 "Base and In should be in the same block");
849 assert(
Base->comesBefore(In) &&
"Base should come before In");
852 std::deque<Instruction *> WorkQ = {
In};
853 while (!WorkQ.empty()) {
859 if (
auto *
I = dyn_cast<Instruction>(
Op)) {
860 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
868auto AlignVectors::createAddressGroups() ->
bool {
873 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
874 for (AddrInfo &W : WorkStack) {
875 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
876 return std::make_pair(
W.Inst, *
D);
878 return std::make_pair(
nullptr, 0);
881 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
884 auto AI = this->getAddrInfo(
I);
887 auto F = findBaseAndOffset(*AI);
890 AI->Offset =
F.second;
893 WorkStack.push_back(*AI);
894 GroupInst = AI->Inst;
896 AddrGroups[GroupInst].push_back(*AI);
902 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &
Block)
903 WorkStack.pop_back();
906 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
907 assert(WorkStack.empty());
912 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
916 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
919 return !AddrGroups.empty();
922auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
930 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
931 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
935 if (Move.IsHvx != isHvx(Info))
939 if (
Base->getParent() !=
Info.Inst->getParent())
942 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator()))
946 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator()) &&
947 HVC.isSafeToClone(*
I);
949 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
953 Move.Main.push_back(
Info.Inst);
960 for (
const AddrInfo &Info : Group) {
961 if (!
Info.Inst->mayReadFromMemory())
963 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
964 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
true);
968 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
971 if (!HVC.HST.useHVXV62Ops())
972 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
977auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
985 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
986 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
992 "Not handling stores with return values");
994 if (Move.IsHvx != isHvx(Info))
1000 if (
Base->getParent() !=
Info.Inst->getParent())
1002 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
1004 Move.Main.push_back(
Info.Inst);
1008 MoveList StoreGroups;
1010 for (
auto I = Group.rbegin(), E = Group.rend();
I != E; ++
I) {
1011 const AddrInfo &
Info = *
I;
1012 if (!
Info.Inst->mayWriteToMemory())
1014 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1015 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info),
false);
1019 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
1022 if (!HVC.HST.useHVXV62Ops())
1023 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.IsHvx; });
1028 if (!VADoFullStores) {
1029 erase_if(StoreGroups, [
this](
const MoveGroup &
G) {
1031 auto MaybeInfo = this->getAddrInfo(*S);
1032 assert(MaybeInfo.has_value());
1033 return HVC.HST.isHVXVectorType(
1034 EVT::getEVT(MaybeInfo->ValTy, false));
1042auto AlignVectors::moveTogether(MoveGroup &Move)
const ->
bool {
1044 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
1050 Move.Clones = cloneBefore(Where->
getIterator(), Move.Deps);
1055 M->moveAfter(Where);
1056 for (
auto [Old, New] : Move.Clones)
1057 M->replaceUsesOfWith(Old, New);
1061 for (
int i = 0, e = Move.Deps.size(); i != e; ++i)
1062 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1067 assert(Move.Deps.empty());
1076 return Move.Main.size() + Move.Deps.size() > 1;
1079template <
typename T>
1085 assert(HVC.isSafeToClone(*
I));
1087 C->setName(
Twine(
"c.") +
I->getName() +
".");
1088 C->insertBefore(To);
1090 for (
auto [Old, New] : Map)
1091 C->replaceUsesOfWith(Old, New);
1092 Map.insert(std::make_pair(
I,
C));
1098 const ByteSpan &VSpan,
int ScLen,
1103 Type *SecTy = HVC.getByteTy(ScLen);
1104 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1105 bool DoAlign = !HVC.isZero(AlignVal);
1107 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1110 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1137 for (
int Index = 0;
Index != NumSectors; ++
Index)
1138 ASpan.Blocks.emplace_back(
nullptr, ScLen, Index * ScLen);
1139 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1140 ASpan.Blocks[
Index].Seg.Val =
1141 reinterpret_cast<Value *
>(&ASpan.Blocks[
Index]);
1153 assert(
A->getParent() ==
B->getParent());
1154 return A->comesBefore(
B);
1156 auto earliestUser = [&](
const auto &
Uses) {
1159 auto *
I = dyn_cast<Instruction>(
U.getUser());
1160 assert(
I !=
nullptr &&
"Load used in a non-instruction?");
1164 if (
I->getParent() == BaseBlock) {
1165 if (!isa<PHINode>(
I))
1174 for (
const ByteSpan::Block &
B : VSpan) {
1175 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size);
1176 for (
const ByteSpan::Block &S : ASection) {
1177 EarliestUser[S.Seg.Val] = std::min(
1178 EarliestUser[S.Seg.Val], earliestUser(
B.Seg.Val->uses()), isEarlier);
1183 dbgs() <<
"ASpan:\n" << ASpan <<
'\n';
1184 dbgs() <<
"Earliest users of ASpan:\n";
1185 for (
auto &[Val,
User] : EarliestUser) {
1186 dbgs() << Val <<
"\n ->" << *
User <<
'\n';
1190 auto createLoad = [&](
IRBuilderBase &Builder,
const ByteSpan &VSpan,
1191 int Index,
bool MakePred) {
1193 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1195 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1199 int Start = (
Index - DoAlign) * ScLen;
1200 int Width = (1 + DoAlign) * ScLen;
1201 return this->createLoad(Builder, SecTy,
Ptr,
Predicate, ScLen, True, Undef,
1202 VSpan.section(Start, Width).values());
1207 assert(
In->getParent() == To->getParent());
1208 DepList Deps = getUpwardDeps(&*In, &*To);
1211 InstMap
Map = cloneBefore(In, Deps);
1212 for (
auto [Old, New] : Map)
1213 In->replaceUsesOfWith(Old, New);
1218 for (
int Index = 0;
Index != NumSectors + 1; ++
Index) {
1226 DoAlign &&
Index > 0 ? EarliestUser[&ASpan[
Index - 1]] :
nullptr;
1228 Index < NumSectors ? EarliestUser[&ASpan[
Index]] :
nullptr;
1229 if (
auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1232 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1239 if (
auto *Load = dyn_cast<Instruction>(Loads[Index])) {
1240 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1241 moveBefore(
Load->getIterator(), BasePos);
1243 LLVM_DEBUG(
dbgs() <<
"Loads[" << Index <<
"]:" << *Loads[Index] <<
'\n');
1249 for (
int Index = 0;
Index != NumSectors; ++
Index) {
1250 ASpan[
Index].Seg.Val =
nullptr;
1251 if (
auto *Where = EarliestUser[&ASpan[Index]]) {
1257 assert(NextLoad !=
nullptr);
1258 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1260 ASpan[
Index].Seg.Val = Val;
1265 for (
const ByteSpan::Block &
B : VSpan) {
1266 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
1273 std::vector<ByteSpan::Block *> ABlocks;
1274 for (ByteSpan::Block &S : ASection) {
1275 if (S.Seg.Val !=
nullptr)
1276 ABlocks.push_back(&S);
1279 [&](
const ByteSpan::Block *
A,
const ByteSpan::Block *
B) {
1280 return isEarlier(cast<Instruction>(
A->Seg.Val),
1281 cast<Instruction>(
B->Seg.Val));
1283 for (ByteSpan::Block *S : ABlocks) {
1286 Instruction *SegI = cast<Instruction>(S->Seg.Val);
1288 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1290 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1298 Type *ValTy = getPayload(
B.Seg.Val)->getType();
1301 getPassThrough(
B.Seg.Val),
"sel");
1302 B.Seg.Val->replaceAllUsesWith(Sel);
1307 const ByteSpan &VSpan,
int ScLen,
1312 Type *SecTy = HVC.getByteTy(ScLen);
1313 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1314 bool DoAlign = !HVC.isZero(AlignVal);
1317 ByteSpan ASpanV, ASpanM;
1325 auto *VecTy = VectorType::get(Ty, 1,
false);
1331 for (
int Index = (DoAlign ? -1 : 0);
Index != NumSectors + DoAlign; ++
Index) {
1335 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1340 for (ByteSpan::Block &S : VSection) {
1341 Value *Pay = getPayload(S.Seg.Val);
1342 Value *
Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1343 Pay->
getType(), HVC.getByteTy());
1344 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1345 S.Seg.Start, S.Seg.Size, S.Pos);
1346 AccumM = Builder.
CreateOr(AccumM, PartM);
1348 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1349 S.Seg.Start, S.Seg.Size, S.Pos);
1354 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1355 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1359 dbgs() <<
"ASpanV before vlalign:\n" << ASpanV <<
'\n';
1360 dbgs() <<
"ASpanM before vlalign:\n" << ASpanM <<
'\n';
1365 for (
int Index = 1;
Index != NumSectors + 2; ++
Index) {
1366 Value *PrevV = ASpanV[
Index - 1].Seg.Val, *ThisV = ASpanV[
Index].Seg.Val;
1367 Value *PrevM = ASpanM[
Index - 1].Seg.Val, *ThisM = ASpanM[
Index].Seg.Val;
1369 ASpanV[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1370 ASpanM[
Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1375 dbgs() <<
"ASpanV after vlalign:\n" << ASpanV <<
'\n';
1376 dbgs() <<
"ASpanM after vlalign:\n" << ASpanM <<
'\n';
1379 auto createStore = [&](
IRBuilderBase &Builder,
const ByteSpan &ASpanV,
1380 const ByteSpan &ASpanM,
int Index,
bool MakePred) {
1383 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1386 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1388 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1392 int Start = (
Index - DoAlign) * ScLen;
1393 int Width = (1 + DoAlign) * ScLen;
1395 HVC.vlsb(Builder, Mask),
1396 VSpan.section(Start, Width).values());
1399 for (
int Index = 0;
Index != NumSectors + DoAlign; ++
Index) {
1400 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1404auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
1413 auto getMaxOf = [](
auto Range,
auto GetValue) {
1415 return GetValue(
A) < GetValue(
B);
1419 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1434 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1437 BaseInfos, std::back_inserter(MoveInfos),
1438 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
1441 const AddrInfo &WithMaxAlign =
1442 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
1443 Align MaxGiven = WithMaxAlign.HaveAlign;
1446 const AddrInfo &WithMinOffset =
1447 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
1449 const AddrInfo &WithMaxNeeded =
1450 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
1451 Align MinNeeded = WithMaxNeeded.NeedAlign;
1465 Value *AlignAddr =
nullptr;
1466 Value *AlignVal =
nullptr;
1468 if (MinNeeded <= MaxGiven) {
1469 int Start = WithMinOffset.Offset;
1470 int OffAtMax = WithMaxAlign.Offset;
1477 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
1478 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1479 WithMaxAlign.ValTy, Adjust, Move.Clones);
1480 int Diff = Start - (OffAtMax + Adjust);
1481 AlignVal = HVC.getConstInt(Diff);
1483 assert(
static_cast<decltype(MinNeeded.
value())
>(Diff) < MinNeeded.value());
1493 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1494 MinNeeded.value(), Move.Clones);
1496 Builder.
CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(),
"pti");
1497 if (
auto *
I = dyn_cast<Instruction>(AlignVal)) {
1498 for (
auto [Old, New] : Move.Clones)
1499 I->replaceUsesOfWith(Old, New);
1504 for (
const AddrInfo &AI : MoveInfos) {
1505 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1506 AI.Offset - WithMinOffset.Offset);
1512 int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
1513 : std::max<int>(MinNeeded.value(), 4);
1514 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1515 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1518 dbgs() <<
"ScLen: " << ScLen <<
"\n";
1519 dbgs() <<
"AlignVal:" << *AlignVal <<
"\n";
1520 dbgs() <<
"AlignAddr:" << *AlignAddr <<
"\n";
1521 dbgs() <<
"VSpan:\n" << VSpan <<
'\n';
1525 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1527 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1529 for (
auto *Inst : Move.Main)
1530 Inst->eraseFromParent();
1536 int Alignment)
const ->
Value * {
1537 auto *AlignTy = AlignVal->
getType();
1539 AlignVal, ConstantInt::get(AlignTy, Alignment - 1),
"and");
1540 Value *
Zero = ConstantInt::get(AlignTy, 0);
1544auto AlignVectors::isSectorTy(
Type *Ty)
const ->
bool {
1545 if (!HVC.isByteVecTy(Ty))
1547 int Size = HVC.getSizeOf(Ty);
1548 if (HVC.HST.isTypeForHVX(Ty))
1549 return Size ==
static_cast<int>(HVC.HST.getVectorLength());
1553auto AlignVectors::run() ->
bool {
1554 LLVM_DEBUG(
dbgs() <<
"Running HVC::AlignVectors on " << HVC.F.getName()
1556 if (!createAddressGroups())
1560 dbgs() <<
"Address groups(" << AddrGroups.size() <<
"):\n";
1561 for (
auto &[In, AL] : AddrGroups) {
1562 for (
const AddrInfo &AI : AL)
1563 dbgs() <<
"---\n" << AI <<
'\n';
1567 bool Changed =
false;
1568 MoveList LoadGroups, StoreGroups;
1570 for (
auto &
G : AddrGroups) {
1576 dbgs() <<
"\nLoad groups(" << LoadGroups.size() <<
"):\n";
1577 for (
const MoveGroup &
G : LoadGroups)
1578 dbgs() <<
G <<
"\n";
1579 dbgs() <<
"Store groups(" << StoreGroups.size() <<
"):\n";
1580 for (
const MoveGroup &
G : StoreGroups)
1581 dbgs() <<
G <<
"\n";
1585 unsigned CountLimit = VAGroupCountLimit;
1586 if (CountLimit == 0)
1589 if (LoadGroups.size() > CountLimit) {
1590 LoadGroups.resize(CountLimit);
1591 StoreGroups.clear();
1593 unsigned StoreLimit = CountLimit - LoadGroups.size();
1594 if (StoreGroups.size() > StoreLimit)
1595 StoreGroups.resize(StoreLimit);
1598 for (
auto &M : LoadGroups)
1599 Changed |= moveTogether(M);
1600 for (
auto &M : StoreGroups)
1601 Changed |= moveTogether(M);
1605 for (
auto &M : LoadGroups)
1606 Changed |= realignGroup(M);
1607 for (
auto &M : StoreGroups)
1608 Changed |= realignGroup(M);
1618 -> std::pair<unsigned, Signedness> {
1619 unsigned Bits = HVC.getNumSignificantBits(V, In);
1625 KnownBits Known = HVC.getKnownBits(V, In);
1626 Signedness Sign =
Signed;
1627 unsigned NumToTest = 0;
1631 NumToTest =
Bits - 1;
1644 return {
Bits, Sign};
1647auto HvxIdioms::canonSgn(SValue
X, SValue
Y)
const
1648 -> std::pair<SValue, SValue> {
1661auto HvxIdioms::matchFxpMul(
Instruction &In)
const -> std::optional<FxpOp> {
1662 using namespace PatternMatch;
1663 auto *Ty =
In.getType();
1666 return std::nullopt;
1668 unsigned Width = cast<IntegerType>(Ty->
getScalarType())->getBitWidth();
1675 auto m_Shr = [](
auto &&
V,
auto &&S) {
1679 const APInt *Qn =
nullptr;
1687 if (
Op.Frac > Width)
1688 return std::nullopt;
1691 const APInt *
C =
nullptr;
1695 return std::nullopt;
1703 Op.Opcode = Instruction::Mul;
1705 Op.X.Sgn = getNumSignificantBits(
Op.X.Val, &In).second;
1706 Op.Y.Sgn = getNumSignificantBits(
Op.Y.Val, &In).second;
1707 Op.ResTy = cast<VectorType>(Ty);
1711 return std::nullopt;
1714auto HvxIdioms::processFxpMul(
Instruction &In,
const FxpOp &
Op)
const
1716 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1718 auto *VecTy = dyn_cast<VectorType>(
Op.X.Val->getType());
1719 if (VecTy ==
nullptr)
1721 auto *ElemTy = cast<IntegerType>(VecTy->getElementType());
1722 unsigned ElemWidth = ElemTy->getBitWidth();
1725 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1735 if (ElemWidth <= 32 &&
Op.Frac == 0)
1738 auto [BitsX, SignX] = getNumSignificantBits(
Op.X.Val, &In);
1739 auto [BitsY, SignY] = getNumSignificantBits(
Op.Y.Val, &In);
1747 auto roundUpWidth = [](
unsigned Width) ->
unsigned {
1753 if (Width > 32 && Width % 32 != 0) {
1760 BitsX = roundUpWidth(BitsX);
1761 BitsY = roundUpWidth(BitsY);
1766 unsigned Width = std::max(BitsX, BitsY);
1768 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1769 if (Width < ElemWidth) {
1772 }
else if (Width > ElemWidth) {
1779 assert(
X->getType() ==
Y->getType() &&
X->getType() == ResizeTy);
1781 unsigned VecLen = HVC.length(ResizeTy);
1782 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1786 ChopOp.ResTy = VectorType::get(
Op.ResTy->getElementType(), ChopLen,
false);
1788 for (
unsigned V = 0;
V != VecLen / ChopLen; ++
V) {
1789 ChopOp.X.Val = HVC.subvector(Builder,
X, V * ChopLen, ChopLen);
1790 ChopOp.Y.Val = HVC.subvector(Builder,
Y, V * ChopLen, ChopLen);
1791 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1807 const FxpOp &
Op)
const ->
Value * {
1808 assert(
Op.X.Val->getType() ==
Op.Y.Val->getType());
1809 auto *InpTy = cast<VectorType>(
Op.X.Val->getType());
1810 unsigned Width = InpTy->getScalarSizeInBits();
1813 if (!
Op.RoundAt || *
Op.RoundAt ==
Op.Frac - 1) {
1816 Value *QMul =
nullptr;
1818 QMul = createMulQ15(Builder,
Op.X,
Op.Y, Rounding);
1819 }
else if (Width == 32) {
1820 QMul = createMulQ31(Builder,
Op.X,
Op.Y, Rounding);
1822 if (QMul !=
nullptr)
1828 assert(Width < 32 || Width % 32 == 0);
1838 assert(
Op.Frac != 0 &&
"Unshifted mul should have been skipped");
1839 if (
Op.Frac == 16) {
1841 if (
Value *MulH = createMulH16(Builder,
Op.X,
Op.Y))
1845 Value *Prod32 = createMul16(Builder,
Op.X,
Op.Y);
1847 Value *RoundVal = HVC.getConstSplat(Prod32->
getType(), 1 << *
Op.RoundAt);
1848 Prod32 = Builder.
CreateAdd(Prod32, RoundVal,
"add");
1853 ? Builder.
CreateAShr(Prod32, ShiftAmt,
"asr")
1854 : Builder.
CreateLShr(Prod32, ShiftAmt,
"lsr");
1855 return Builder.
CreateTrunc(Shifted, InpTy,
"trn");
1862 auto WordX = HVC.splitVectorElements(Builder,
Op.X.Val, 32);
1863 auto WordY = HVC.splitVectorElements(Builder,
Op.Y.Val, 32);
1864 auto WordP = createMulLong(Builder, WordX,
Op.X.Sgn, WordY,
Op.Y.Sgn);
1866 auto *HvxWordTy = cast<VectorType>(WordP.front()->getType());
1869 if (
Op.RoundAt.has_value()) {
1872 RoundV[*
Op.RoundAt / 32] =
1873 HVC.getConstSplat(HvxWordTy, 1 << (*
Op.RoundAt % 32));
1874 WordP = createAddLong(Builder, WordP, RoundV);
1880 unsigned SkipWords =
Op.Frac / 32;
1881 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy,
Op.Frac % 32);
1883 for (
int Dst = 0,
End = WordP.size() - SkipWords; Dst !=
End; ++Dst) {
1884 int Src = Dst + SkipWords;
1886 if (Src + 1 <
End) {
1897 WordP.resize(WordP.size() - SkipWords);
1899 return HVC.joinVectorElements(Builder, WordP,
Op.ResTy);
1902auto HvxIdioms::createMulQ15(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1903 bool Rounding)
const ->
Value * {
1904 assert(
X.Val->getType() ==
Y.Val->getType());
1905 assert(
X.Val->getType()->getScalarType() == HVC.getIntTy(16));
1912 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
1913 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs,
X.Val->getType(),
1917auto HvxIdioms::createMulQ31(
IRBuilderBase &Builder, SValue
X, SValue
Y,
1918 bool Rounding)
const ->
Value * {
1919 Type *InpTy =
X.Val->getType();
1920 assert(InpTy ==
Y.Val->getType());
1927 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
1929 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
1930 : HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_sacc);
1932 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {
X.Val,
Y.Val});
1933 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
1934 {V1,
X.Val,
Y.Val});
1938 Value *CarryIn)
const
1939 -> std::pair<Value *, Value *> {
1940 assert(
X->getType() ==
Y->getType());
1941 auto VecTy = cast<VectorType>(
X->getType());
1942 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
1945 if (CarryIn ==
nullptr && HVC.HST.useHVXV66Ops()) {
1946 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
1948 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
1949 if (CarryIn ==
nullptr)
1950 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
1951 Args.push_back(CarryIn);
1953 Value *
Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
1957 return {
Result, CarryOut};
1964 if (CarryIn !=
nullptr) {
1965 unsigned Width = VecTy->getScalarSizeInBits();
1968 for (
unsigned i = 0, e = 32 / Width; i !=
e; ++i)
1969 Mask = (Mask << Width) | 1;
1971 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
1973 HVC.createHvxIntrinsic(Builder, V6_vandqrt,
nullptr,
1974 {CarryIn, HVC.getConstInt(Mask)});
1975 Result1 = Builder.
CreateAdd(
X, ValueIn,
"add");
1981 return {Result2, Builder.
CreateOr(CarryOut1, CarryOut2,
"orb")};
1984auto HvxIdioms::createMul16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
1987 std::tie(
X,
Y) = canonSgn(
X,
Y);
1990 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
1993 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
1995 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
2000 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {
Y.Val,
X.Val});
2002 return HVC.vshuff(Builder, HVC.sublo(Builder,
P), HVC.subhi(Builder,
P));
2005auto HvxIdioms::createMulH16(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2007 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16),
false);
2009 if (HVC.HST.useHVXV69Ops()) {
2011 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
2012 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2017 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16),
true);
2020 unsigned Len = HVC.length(HvxP16Ty) / 2;
2023 for (
int i = 0; i !=
static_cast<int>(
Len); ++i)
2024 PickOdd[i] = 2 * i + 1;
2027 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd,
"shf");
2030auto HvxIdioms::createMul32(
IRBuilderBase &Builder, SValue
X, SValue
Y)
const
2031 -> std::pair<Value *, Value *> {
2032 assert(
X.Val->getType() ==
Y.Val->getType());
2033 assert(
X.Val->getType() == HvxI32Ty);
2036 std::tie(
X,
Y) = canonSgn(
X,
Y);
2039 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2041 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2043 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2046 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts,
nullptr,
2047 {
X.Val,
Y.Val}, {HvxI32Ty});
2056 assert(WordX.size() == WordY.size());
2057 unsigned Idx = 0,
Length = WordX.size();
2061 if (HVC.isZero(WordX[
Idx]))
2063 else if (HVC.isZero(WordY[
Idx]))
2070 Value *Carry =
nullptr;
2072 std::tie(Sum[
Idx], Carry) =
2073 createAddCarry(Builder, WordX[
Idx], WordY[
Idx], Carry);
2087 for (
int i = 0, e = WordX.size(); i != e; ++i) {
2088 for (
int j = 0, f = WordY.size(); j != f; ++j) {
2090 Signedness SX = (i + 1 ==
e) ? SgnX :
Unsigned;
2092 auto [
Lo,
Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[
j],
SY});
2093 Products[i +
j + 0].push_back(
Lo);
2094 Products[i +
j + 1].push_back(
Hi);
2108 for (
int i = 0, e = Products.size(); i !=
e; ++i) {
2109 while (Products[i].
size() > 1) {
2110 Value *Carry =
nullptr;
2111 for (
int j = i;
j !=
e; ++
j) {
2112 auto &ProdJ = Products[
j];
2113 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2114 pop_back_or_zero(ProdJ), Carry);
2115 ProdJ.insert(ProdJ.begin(), Sum);
2122 for (
auto &
P : Products) {
2123 assert(
P.size() == 1 &&
"Should have been added together");
2130auto HvxIdioms::run() ->
bool {
2131 bool Changed =
false;
2134 for (
auto It =
B.rbegin(); It !=
B.rend(); ++It) {
2135 if (
auto Fxm = matchFxpMul(*It)) {
2136 Value *
New = processFxpMul(*It, *Fxm);
2141 bool StartOver = !isa<Instruction>(New);
2142 It->replaceAllUsesWith(New);
2144 It = StartOver ?
B.rbegin()
2145 : cast<Instruction>(New)->getReverseIterator();
2156auto HexagonVectorCombine::run() ->
bool {
2158 dbgs() <<
"Module before HexagonVectorCombine\n" << *
F.getParent();
2160 bool Changed =
false;
2161 if (HST.useHVXOps()) {
2163 Changed |= AlignVectors(*this).run();
2165 Changed |= HvxIdioms(*this).run();
2169 dbgs() <<
"Module " << (Changed ?
"(modified)" :
"(unchanged)")
2170 <<
" after HexagonVectorCombine\n"
2176auto HexagonVectorCombine::getIntTy(
unsigned Width)
const ->
IntegerType * {
2180auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
2185 return VectorType::get(ByteTy, ElemCount,
false);
2188auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
2193 return VectorType::get(BoolTy, ElemCount,
false);
2196auto HexagonVectorCombine::getConstInt(
int Val,
unsigned Width)
const
2201auto HexagonVectorCombine::isZero(
const Value *Val)
const ->
bool {
2202 if (
auto *
C = dyn_cast<Constant>(Val))
2203 return C->isZeroValue();
2207auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
2208 -> std::optional<APInt> {
2209 if (
auto *CI = dyn_cast<ConstantInt>(Val))
2210 return CI->getValue();
2211 return std::nullopt;
2214auto HexagonVectorCombine::isUndef(
const Value *Val)
const ->
bool {
2215 return isa<UndefValue>(Val);
2218auto HexagonVectorCombine::isTrue(
const Value *Val)
const ->
bool {
2222auto HexagonVectorCombine::isFalse(
const Value *Val)
const ->
bool {
2226auto HexagonVectorCombine::getHvxTy(
Type *ElemTy,
bool Pair)
const
2232 "Invalid HVX element type");
2233 unsigned HwLen = HST.getVectorLength();
2235 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
2239auto HexagonVectorCombine::getSizeOf(
const Value *Val, SizeKind Kind)
const
2241 return getSizeOf(Val->
getType(), Kind);
2244auto HexagonVectorCombine::getSizeOf(
const Type *Ty, SizeKind Kind)
const
2246 auto *NcTy =
const_cast<Type *
>(Ty);
2249 return DL.getTypeStoreSize(NcTy).getFixedValue();
2251 return DL.getTypeAllocSize(NcTy).getFixedValue();
2256auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
2259 if (HST.isTypeForHVX(Ty))
2260 return HST.getVectorLength();
2261 return DL.getABITypeAlign(Ty).value();
2264auto HexagonVectorCombine::length(
Value *Val)
const ->
size_t {
2265 return length(Val->
getType());
2268auto HexagonVectorCombine::length(
Type *Ty)
const ->
size_t {
2269 auto *VecTy = dyn_cast<VectorType>(Ty);
2270 assert(VecTy &&
"Must be a vector type");
2271 return VecTy->getElementCount().getFixedValue();
2274auto HexagonVectorCombine::getNullValue(
Type *Ty)
const ->
Constant * {
2277 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2282auto HexagonVectorCombine::getFullValue(
Type *Ty)
const ->
Constant * {
2285 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2290auto HexagonVectorCombine::getConstSplat(
Type *Ty,
int Val)
const
2293 auto VecTy = cast<VectorType>(Ty);
2294 Type *ElemTy = VecTy->getElementType();
2297 ConstantInt::get(ElemTy, Val));
2301auto HexagonVectorCombine::simplify(
Value *V)
const ->
Value * {
2302 if (
auto *In = dyn_cast<Instruction>(V)) {
2312 int Where)
const ->
Value * {
2313 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
2314 int SrcLen = getSizeOf(Src);
2315 int DstLen = getSizeOf(Dst);
2321 Value *P2Src = vresize(Builder, Src, P2Len, Undef);
2322 Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
2325 for (
int i = 0; i != P2Len; ++i) {
2329 (Where <= i && i < Where +
Length) ? P2Len + Start + (i - Where) : i;
2333 return vresize(Builder, P2Insert, DstLen, Undef);
2338 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
2341 int VecLen = getSizeOf(
Hi);
2342 if (
auto IntAmt = getIntValue(Amt))
2343 return getElementRange(Builder,
Lo,
Hi, VecLen - IntAmt->getSExtValue(),
2346 if (HST.isTypeForHVX(
Hi->getType())) {
2347 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2348 "Expecting an exact HVX type");
2349 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
2350 Hi->getType(), {Hi, Lo, Amt});
2363 return vralignb(Builder,
Lo,
Hi, Sub);
2370 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
2373 int VecLen = getSizeOf(
Lo);
2374 if (
auto IntAmt = getIntValue(Amt))
2375 return getElementRange(Builder,
Lo,
Hi, IntAmt->getSExtValue(), VecLen);
2377 if (HST.isTypeForHVX(
Lo->getType())) {
2378 assert(
static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
2379 "Expecting an exact HVX type");
2380 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
2381 Lo->getType(), {Hi, Lo, Amt});
2408 std::vector<Value *> Work[2];
2409 int ThisW = 0, OtherW = 1;
2411 Work[ThisW].
assign(Vecs.begin(), Vecs.end());
2412 while (Work[ThisW].
size() > 1) {
2413 auto *Ty = cast<VectorType>(Work[ThisW].front()->
getType());
2414 SMask.
resize(length(Ty) * 2);
2415 std::iota(SMask.
begin(), SMask.
end(), 0);
2417 Work[OtherW].clear();
2418 if (Work[ThisW].
size() % 2 != 0)
2420 for (
int i = 0, e = Work[ThisW].
size(); i <
e; i += 2) {
2422 Work[ThisW][i], Work[ThisW][i + 1], SMask,
"shf");
2423 Work[OtherW].push_back(Joined);
2431 SMask.
resize(Vecs.size() * length(Vecs.front()->getType()));
2432 std::iota(SMask.
begin(), SMask.
end(), 0);
2440 auto *ValTy = cast<VectorType>(Val->
getType());
2441 assert(ValTy->getElementType() == Pad->getType());
2443 int CurSize = length(ValTy);
2444 if (CurSize == NewSize)
2447 if (CurSize > NewSize)
2448 return getElementRange(Builder, Val, Val, 0, NewSize);
2451 std::iota(SMask.
begin(), SMask.
begin() + CurSize, 0);
2452 std::fill(SMask.
begin() + CurSize, SMask.
end(), CurSize);
2466 if (FromSTy == ToSTy)
2469 int FromSize = getSizeOf(FromSTy);
2470 int ToSize = getSizeOf(ToSTy);
2471 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
2473 auto *MaskTy = cast<VectorType>(
Mask->getType());
2474 int FromCount = length(MaskTy);
2475 int ToCount = (FromCount * FromSize) / ToSize;
2476 assert((FromCount * FromSize) % ToSize == 0);
2478 auto *FromITy =
getIntTy(FromSize * 8);
2479 auto *ToITy =
getIntTy(ToSize * 8);
2484 Mask, VectorType::get(FromITy, FromCount,
false),
"sxt");
2486 Ext, VectorType::get(ToITy, ToCount,
false),
"cst");
2488 Cast, VectorType::get(getBoolTy(), ToCount,
false),
"trn");
2495 if (ScalarTy == getBoolTy())
2498 Value *Bytes = vbytes(Builder, Val);
2499 if (
auto *VecTy = dyn_cast<VectorType>(Bytes->
getType()))
2500 return Builder.
CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)),
"trn");
2503 return Builder.
CreateTrunc(Bytes, getBoolTy(),
"trn");
2510 if (ScalarTy == getByteTy())
2513 if (ScalarTy != getBoolTy())
2514 return Builder.
CreateBitCast(Val, getByteTy(getSizeOf(Val)),
"cst");
2516 if (
auto *VecTy = dyn_cast<VectorType>(Val->
getType()))
2517 return Builder.
CreateSExt(Val, VectorType::get(getByteTy(), VecTy),
"sxt");
2518 return Builder.
CreateSExt(Val, getByteTy(),
"sxt");
2522 unsigned Start,
unsigned Length)
const
2525 return getElementRange(Builder, Val, Val, Start,
Length);
2530 size_t Len = length(Val);
2531 assert(Len % 2 == 0 &&
"Length should be even");
2532 return subvector(Builder, Val, 0, Len / 2);
2537 size_t Len = length(Val);
2538 assert(Len % 2 == 0 &&
"Length should be even");
2539 return subvector(Builder, Val, Len / 2, Len / 2);
2544 assert(Val0->getType() == Val1->getType());
2545 int Len = length(Val0);
2548 for (
int i = 0; i !=
Len; ++i) {
2557 assert(Val0->getType() == Val1->getType());
2558 int Len = length(Val0);
2561 for (
int i = 0; i !=
Len; ++i) {
2562 Mask[2 * i + 0] = i;
2568auto HexagonVectorCombine::createHvxIntrinsic(
IRBuilderBase &Builder,
2576 Type *SrcTy = Val->getType();
2577 if (SrcTy == DestTy)
2582 assert(HST.isTypeForHVX(SrcTy,
true));
2585 if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
2589 unsigned HwLen = HST.getVectorLength();
2590 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
2591 : Intrinsic::hexagon_V6_pred_typecast_128B;
2601 for (
int i = 0, e =
Args.size(); i != e; ++i) {
2603 Type *
T = IntrTy->getParamType(i);
2604 if (
A->getType() !=
T) {
2610 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ?
"cup" :
"";
2621 assert(HST.isTypeForHVX(CallTy,
true));
2622 return getCast(Builder, Call,
RetTy);
2625auto HexagonVectorCombine::splitVectorElements(
IRBuilderBase &Builder,
2627 unsigned ToWidth)
const
2641 auto *VecTy = cast<VectorType>(Vec->getType());
2642 assert(VecTy->getElementType()->isIntegerTy());
2643 unsigned FromWidth = VecTy->getScalarSizeInBits();
2645 assert(ToWidth <= FromWidth &&
"Breaking up into wider elements?");
2646 unsigned NumResults = FromWidth / ToWidth;
2650 unsigned Length = length(VecTy);
2654 auto splitInHalf = [&](
unsigned Begin,
unsigned End,
auto splitFunc) ->
void {
2658 if (Begin + 1 ==
End)
2664 auto *VTy = VectorType::get(
getIntTy(Width / 2), 2 *
Length,
false);
2667 Value *Res =
vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
2669 unsigned Half = (Begin +
End) / 2;
2670 Results[Begin] = sublo(Builder, Res);
2671 Results[Half] = subhi(Builder, Res);
2673 splitFunc(Begin, Half, splitFunc);
2674 splitFunc(Half,
End, splitFunc);
2677 splitInHalf(0, NumResults, splitInHalf);
2681auto HexagonVectorCombine::joinVectorElements(
IRBuilderBase &Builder,
2685 assert(ToType->getElementType()->isIntegerTy());
2696 unsigned ToWidth = ToType->getScalarSizeInBits();
2697 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
2698 assert(Width <= ToWidth);
2700 unsigned Length = length(Inputs.front()->getType());
2702 unsigned NeedInputs = ToWidth / Width;
2703 if (Inputs.size() != NeedInputs) {
2708 Last, getConstSplat(
Last->getType(), Width - 1),
"asr");
2709 Inputs.resize(NeedInputs, Sign);
2712 while (Inputs.size() > 1) {
2715 for (
int i = 0, e = Inputs.size(); i < e; i += 2) {
2716 Value *Res =
vshuff(Builder, Inputs[i], Inputs[i + 1]);
2719 Inputs.resize(Inputs.size() / 2);
2722 assert(Inputs.front()->getType() == ToType);
2723 return Inputs.front();
2726auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
2728 -> std::optional<int> {
2730 const SCEV *Scev0 = SE.getSCEV(Ptr0);
2731 const SCEV *Scev1 = SE.getSCEV(Ptr1);
2732 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
2733 if (
auto *Const = dyn_cast<SCEVConstant>(ScevDiff)) {
2735 if (
V.isSignedIntN(8 *
sizeof(
int)))
2736 return static_cast<int>(
V.getSExtValue());
2743 I->eraseFromParent();
2748#define CallBuilder(B, F) \
2751 if (auto *I = dyn_cast<Instruction>(V)) \
2752 B_.ToErase.push_back(I); \
2756 auto Simplify = [
this](
Value *
V) {
2762 auto StripBitCast = [](
Value *
V) {
2763 while (
auto *
C = dyn_cast<BitCastInst>(V))
2764 V =
C->getOperand(0);
2768 Ptr0 = StripBitCast(Ptr0);
2769 Ptr1 = StripBitCast(Ptr1);
2770 if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
2771 return std::nullopt;
2773 auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
2774 auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
2775 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
2776 return std::nullopt;
2777 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
2778 return std::nullopt;
2780 Builder
B(Gep0->getParent());
2781 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
2784 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
2785 return std::nullopt;
2787 Value *Idx0 = Gep0->getOperand(1);
2788 Value *Idx1 = Gep1->getOperand(1);
2791 if (
auto *Diff = dyn_cast<ConstantInt>(
2793 return Diff->getSExtValue() * Scale;
2795 KnownBits Known0 = getKnownBits(Idx0, Gep0);
2796 KnownBits Known1 = getKnownBits(Idx1, Gep1);
2799 return std::nullopt;
2806 if (
auto *
C = dyn_cast<ConstantInt>(SubU)) {
2807 Diff0 =
C->getSExtValue();
2809 return std::nullopt;
2817 if (
auto *
C = dyn_cast<ConstantInt>(SubK)) {
2818 Diff1 =
C->getSExtValue();
2820 return std::nullopt;
2823 return (Diff0 + Diff1) * Scale;
2828auto HexagonVectorCombine::getNumSignificantBits(
const Value *V,
2834auto HexagonVectorCombine::getKnownBits(
const Value *V,
2840auto HexagonVectorCombine::isSafeToClone(
const Instruction &In)
const ->
bool {
2841 if (
In.mayHaveSideEffects() ||
In.isAtomic() ||
In.isVolatile() ||
2842 In.isFenceLike() ||
In.mayReadOrWriteMemory()) {
2845 if (isa<CallBase>(In) || isa<AllocaInst>(In))
2850template <
typename T>
2851auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &In,
2853 const T &IgnoreInsts)
const
2856 [
this](
const Instruction &
I) -> std::optional<MemoryLocation> {
2857 if (
const auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
2858 switch (
II->getIntrinsicID()) {
2859 case Intrinsic::masked_load:
2861 case Intrinsic::masked_store:
2872 if (isa<PHINode>(In) || (To !=
Block.end() && isa<PHINode>(*To)))
2877 bool MayWrite =
In.mayWriteToMemory();
2878 auto MaybeLoc = getLocOrNone(In);
2880 auto From =
In.getIterator();
2883 bool MoveUp = (To !=
Block.end() && To->comesBefore(&In));
2885 MoveUp ? std::make_pair(To,
From) :
std::make_pair(
std::next(
From), To);
2886 for (
auto It =
Range.first; It !=
Range.second; ++It) {
2891 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
2892 if (
II->getIntrinsicID() == Intrinsic::assume)
2898 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
2899 if (!CB->hasFnAttr(Attribute::WillReturn))
2901 if (!CB->hasFnAttr(Attribute::NoSync))
2904 if (
I.mayReadOrWriteMemory()) {
2905 auto MaybeLocI = getLocOrNone(
I);
2906 if (MayWrite ||
I.mayWriteToMemory()) {
2907 if (!MaybeLoc || !MaybeLocI)
2909 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
2917auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
2918 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
2919 return VecTy->getElementType() == getByteTy();
2928 std::iota(SMask.
begin(), SMask.
end(), Start);
2940class HexagonVectorCombineLegacy :
public FunctionPass {
2962 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2964 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2965 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2966 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2968 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2970 HexagonVectorCombine HVC(
F, AA, AC, DT, SE, TLI, TM);
2976char HexagonVectorCombineLegacy::ID = 0;
2979 "Hexagon Vector Combine",
false,
false)
2990 return new HexagonVectorCombineLegacy();
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live value
Mark the given Function as meaning that it cannot be changed in any way mark any values that are used as this function s parameters or by its return values(according to Uses) live as well. void DeadArgumentEliminationPass
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
#define CallBuilder(B, F)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
support::ulittle16_t & Lo
support::ulittle16_t & Hi
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator_range< iterator > children()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
Common base class shared among various IRBuilders.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const char * getOpcodeName() const
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
@ And
Bitwise or logical AND of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.