LLVM 22.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonISelLowering.h"
11#include "llvm/ADT/BitVector.h"
12#include "llvm/ADT/SetVector.h"
14#include "llvm/IR/Intrinsics.h"
15#include "llvm/IR/IntrinsicsHexagon.h"
16#include "llvm/Support/Debug.h"
18
19#include <algorithm>
20#include <cmath>
21#include <deque>
22#include <functional>
23#include <map>
24#include <optional>
25#include <set>
26#include <unordered_map>
27#include <utility>
28#include <vector>
29
30#define DEBUG_TYPE "hexagon-isel"
31using namespace llvm;
32
33namespace {
34
35// --------------------------------------------------------------------
36// Implementation of permutation networks.
37
38// Implementation of the node routing through butterfly networks:
39// - Forward delta.
40// - Reverse delta.
41// - Benes.
42//
43//
44// Forward delta network consists of log(N) steps, where N is the number
45// of inputs. In each step, an input can stay in place, or it can get
46// routed to another position[1]. The step after that consists of two
47// networks, each half in size in terms of the number of nodes. In those
48// terms, in the given step, an input can go to either the upper or the
49// lower network in the next step.
50//
51// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
52// positions as long as there is no conflict.
53
54// Here's a delta network for 8 inputs, only the switching routes are
55// shown:
56//
57// Steps:
58// |- 1 ---------------|- 2 -----|- 3 -|
59//
60// Inp[0] *** *** *** *** Out[0]
61// \ / \ / \ /
62// \ / \ / X
63// \ / \ / / \
64// Inp[1] *** \ / *** X *** *** Out[1]
65// \ \ / / \ / \ /
66// \ \ / / X X
67// \ \ / / / \ / \
68// Inp[2] *** \ \ / / *** X *** *** Out[2]
69// \ \ X / / / \ \ /
70// \ \ / \ / / / \ X
71// \ X X / / \ / \
72// Inp[3] *** \ / \ / \ / *** *** *** Out[3]
73// \ X X X /
74// \ / \ / \ / \ /
75// X X X X
76// / \ / \ / \ / \
77// / X X X \
78// Inp[4] *** / \ / \ / \ *** *** *** Out[4]
79// / X X \ \ / \ /
80// / / \ / \ \ \ / X
81// / / X \ \ \ / / \
82// Inp[5] *** / / \ \ *** X *** *** Out[5]
83// / / \ \ \ / \ /
84// / / \ \ X X
85// / / \ \ / \ / \
86// Inp[6] *** / \ *** X *** *** Out[6]
87// / \ / \ \ /
88// / \ / \ X
89// / \ / \ / \
90// Inp[7] *** *** *** *** Out[7]
91//
92//
93// Reverse delta network is same as delta network, with the steps in
94// the opposite order.
95//
96//
97// Benes network is a forward delta network immediately followed by
98// a reverse delta network.
99
100enum class ColorKind { None, Red, Black };
101
102// Graph coloring utility used to partition nodes into two groups:
103// they will correspond to nodes routed to the upper and lower networks.
104struct Coloring {
105 using Node = int;
106 using MapType = std::map<Node, ColorKind>;
107 static constexpr Node Ignore = Node(-1);
108
109 Coloring(ArrayRef<Node> Ord) : Order(Ord) {
110 build();
111 if (!color())
112 Colors.clear();
113 }
114
115 const MapType &colors() const {
116 return Colors;
117 }
118
119 ColorKind other(ColorKind Color) {
120 if (Color == ColorKind::None)
121 return ColorKind::Red;
122 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
123 }
124
125 LLVM_DUMP_METHOD void dump() const;
126
127private:
128 ArrayRef<Node> Order;
129 MapType Colors;
130 std::set<Node> Needed;
131
132 using NodeSet = std::set<Node>;
133 std::map<Node,NodeSet> Edges;
134
135 Node conj(Node Pos) {
136 Node Num = Order.size();
137 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
138 }
139
140 ColorKind getColor(Node N) {
141 auto F = Colors.find(N);
142 return F != Colors.end() ? F->second : ColorKind::None;
143 }
144
145 std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
146
147 void build();
148 bool color();
149};
150} // namespace
151
152std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
153 auto Color = ColorKind::None;
154 for (Node N : Nodes) {
155 ColorKind ColorN = getColor(N);
156 if (ColorN == ColorKind::None)
157 continue;
158 if (Color == ColorKind::None)
159 Color = ColorN;
160 else if (Color != ColorKind::None && Color != ColorN)
161 return { false, ColorKind::None };
162 }
163 return { true, Color };
164}
165
166void Coloring::build() {
167 // Add Order[P] and Order[conj(P)] to Edges.
168 for (unsigned P = 0; P != Order.size(); ++P) {
169 Node I = Order[P];
170 if (I != Ignore) {
171 Needed.insert(I);
172 Node PC = Order[conj(P)];
173 if (PC != Ignore && PC != I)
174 Edges[I].insert(PC);
175 }
176 }
177 // Add I and conj(I) to Edges.
178 for (unsigned I = 0; I != Order.size(); ++I) {
179 if (!Needed.count(I))
180 continue;
181 Node C = conj(I);
182 // This will create an entry in the edge table, even if I is not
183 // connected to any other node. This is necessary, because it still
184 // needs to be colored.
185 NodeSet &Is = Edges[I];
186 if (Needed.count(C))
187 Is.insert(C);
188 }
189}
190
191bool Coloring::color() {
192 SetVector<Node> FirstQ;
193 auto Enqueue = [this,&FirstQ] (Node N) {
195 Q.insert(N);
196 for (unsigned I = 0; I != Q.size(); ++I) {
197 NodeSet &Ns = Edges[Q[I]];
198 Q.insert_range(Ns);
199 }
200 FirstQ.insert_range(Q);
201 };
202 for (Node N : Needed)
203 Enqueue(N);
204
205 for (Node N : FirstQ) {
206 if (Colors.count(N))
207 continue;
208 NodeSet &Ns = Edges[N];
209 auto P = getUniqueColor(Ns);
210 if (!P.first)
211 return false;
212 Colors[N] = other(P.second);
213 }
214
215 // First, color nodes that don't have any dups.
216 for (auto E : Edges) {
217 Node N = E.first;
218 if (!Needed.count(conj(N)) || Colors.count(N))
219 continue;
220 auto P = getUniqueColor(E.second);
221 if (!P.first)
222 return false;
223 Colors[N] = other(P.second);
224 }
225
226 // Now, nodes that are still uncolored. Since the graph can be modified
227 // in this step, create a work queue.
228 std::vector<Node> WorkQ;
229 for (auto E : Edges) {
230 Node N = E.first;
231 if (!Colors.count(N))
232 WorkQ.push_back(N);
233 }
234
235 for (Node N : WorkQ) {
236 NodeSet &Ns = Edges[N];
237 auto P = getUniqueColor(Ns);
238 if (P.first) {
239 Colors[N] = other(P.second);
240 continue;
241 }
242
243 // Coloring failed. Split this node.
244 Node C = conj(N);
245 ColorKind ColorN = other(ColorKind::None);
246 ColorKind ColorC = other(ColorN);
247 NodeSet &Cs = Edges[C];
248 NodeSet CopyNs = Ns;
249 for (Node M : CopyNs) {
250 ColorKind ColorM = getColor(M);
251 if (ColorM == ColorC) {
252 // Connect M with C, disconnect M from N.
253 Cs.insert(M);
254 Edges[M].insert(C);
255 Ns.erase(M);
256 Edges[M].erase(N);
257 }
258 }
259 Colors[N] = ColorN;
260 Colors[C] = ColorC;
261 }
262
263 // Explicitly assign "None" to all uncolored nodes.
264 for (unsigned I = 0; I != Order.size(); ++I)
265 Colors.try_emplace(I, ColorKind::None);
266
267 return true;
268}
269
270#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
271void Coloring::dump() const {
272 dbgs() << "{ Order: {";
273 for (Node P : Order) {
274 if (P != Ignore)
275 dbgs() << ' ' << P;
276 else
277 dbgs() << " -";
278 }
279 dbgs() << " }\n";
280 dbgs() << " Needed: {";
281 for (Node N : Needed)
282 dbgs() << ' ' << N;
283 dbgs() << " }\n";
284
285 dbgs() << " Edges: {\n";
286 for (auto E : Edges) {
287 dbgs() << " " << E.first << " -> {";
288 for (auto N : E.second)
289 dbgs() << ' ' << N;
290 dbgs() << " }\n";
291 }
292 dbgs() << " }\n";
293
294 auto ColorKindToName = [](ColorKind C) {
295 switch (C) {
296 case ColorKind::None:
297 return "None";
298 case ColorKind::Red:
299 return "Red";
300 case ColorKind::Black:
301 return "Black";
302 }
303 llvm_unreachable("all ColorKinds should be handled by the switch above");
304 };
305
306 dbgs() << " Colors: {\n";
307 for (auto C : Colors)
308 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
309 dbgs() << " }\n}\n";
310}
311#endif
312
313namespace {
314// Base class of for reordering networks. They don't strictly need to be
315// permutations, as outputs with repeated occurrences of an input element
316// are allowed.
317struct PermNetwork {
318 using Controls = std::vector<uint8_t>;
319 using ElemType = int;
320 static constexpr ElemType Ignore = ElemType(-1);
321
322 enum : uint8_t {
323 None,
324 Pass,
325 Switch
326 };
327 enum : uint8_t {
328 Forward,
329 Reverse
330 };
331
332 PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
333 Order.assign(Ord.data(), Ord.data()+Ord.size());
334 Log = 0;
335
336 unsigned S = Order.size();
337 while (S >>= 1)
338 ++Log;
339
340 Table.resize(Order.size());
341 for (RowType &Row : Table)
342 Row.resize(Mult*Log, None);
343 }
344
345 void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
346 unsigned Size = Order.size();
347 V.resize(Size);
348 for (unsigned I = 0; I != Size; ++I) {
349 unsigned W = 0;
350 for (unsigned L = 0; L != Log; ++L) {
351 unsigned C = ctl(I, StartAt+L) == Switch;
352 if (Dir == Forward)
353 W |= C << (Log-1-L);
354 else
355 W |= C << L;
356 }
357 assert(isUInt<8>(W));
358 V[I] = uint8_t(W);
359 }
360 }
361
362 uint8_t ctl(ElemType Pos, unsigned Step) const {
363 return Table[Pos][Step];
364 }
365 unsigned size() const {
366 return Order.size();
367 }
368 unsigned steps() const {
369 return Log;
370 }
371
372protected:
373 unsigned Log;
374 std::vector<ElemType> Order;
375 using RowType = std::vector<uint8_t>;
376 std::vector<RowType> Table;
377};
378
379struct ForwardDeltaNetwork : public PermNetwork {
380 ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
381
382 bool run(Controls &V) {
383 if (!route(Order.data(), Table.data(), size(), 0))
384 return false;
385 getControls(V, 0, Forward);
386 return true;
387 }
388
389private:
390 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
391};
392
393struct ReverseDeltaNetwork : public PermNetwork {
394 ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
395
396 bool run(Controls &V) {
397 if (!route(Order.data(), Table.data(), size(), 0))
398 return false;
399 getControls(V, 0, Reverse);
400 return true;
401 }
402
403private:
404 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
405};
406
407struct BenesNetwork : public PermNetwork {
408 BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
409
410 bool run(Controls &F, Controls &R) {
411 if (!route(Order.data(), Table.data(), size(), 0))
412 return false;
413
414 getControls(F, 0, Forward);
415 getControls(R, Log, Reverse);
416 return true;
417 }
418
419private:
420 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
421};
422} // namespace
423
424bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
425 unsigned Step) {
426 bool UseUp = false, UseDown = false;
427 ElemType Num = Size;
428
429 // Cannot use coloring here, because coloring is used to determine
430 // the "big" switch, i.e. the one that changes halves, and in a forward
431 // network, a color can be simultaneously routed to both halves in the
432 // step we're working on.
433 for (ElemType J = 0; J != Num; ++J) {
434 ElemType I = P[J];
435 // I is the position in the input,
436 // J is the position in the output.
437 if (I == Ignore)
438 continue;
439 uint8_t S;
440 if (I < Num/2)
441 S = (J < Num/2) ? Pass : Switch;
442 else
443 S = (J < Num/2) ? Switch : Pass;
444
445 // U is the element in the table that needs to be updated.
446 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
447 if (U < Num/2)
448 UseUp = true;
449 else
450 UseDown = true;
451 if (T[U][Step] != S && T[U][Step] != None)
452 return false;
453 T[U][Step] = S;
454 }
455
456 for (ElemType J = 0; J != Num; ++J)
457 if (P[J] != Ignore && P[J] >= Num/2)
458 P[J] -= Num/2;
459
460 if (Step+1 < Log) {
461 if (UseUp && !route(P, T, Size/2, Step+1))
462 return false;
463 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
464 return false;
465 }
466 return true;
467}
468
469bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
470 unsigned Step) {
471 unsigned Pets = Log-1 - Step;
472 bool UseUp = false, UseDown = false;
473 ElemType Num = Size;
474
475 // In this step half-switching occurs, so coloring can be used.
476 Coloring G({P,Size});
477 const Coloring::MapType &M = G.colors();
478 if (M.empty())
479 return false;
480
481 ColorKind ColorUp = ColorKind::None;
482 for (ElemType J = 0; J != Num; ++J) {
483 ElemType I = P[J];
484 // I is the position in the input,
485 // J is the position in the output.
486 if (I == Ignore)
487 continue;
488 ColorKind C = M.at(I);
489 if (C == ColorKind::None)
490 continue;
491 // During "Step", inputs cannot switch halves, so if the "up" color
492 // is still unknown, make sure that it is selected in such a way that
493 // "I" will stay in the same half.
494 bool InpUp = I < Num/2;
495 if (ColorUp == ColorKind::None)
496 ColorUp = InpUp ? C : G.other(C);
497 if ((C == ColorUp) != InpUp) {
498 // If I should go to a different half than where is it now, give up.
499 return false;
500 }
501
502 uint8_t S;
503 if (InpUp) {
504 S = (J < Num/2) ? Pass : Switch;
505 UseUp = true;
506 } else {
507 S = (J < Num/2) ? Switch : Pass;
508 UseDown = true;
509 }
510 T[J][Pets] = S;
511 }
512
513 // Reorder the working permutation according to the computed switch table
514 // for the last step (i.e. Pets).
515 for (ElemType J = 0, E = Size / 2; J != E; ++J) {
516 ElemType PJ = P[J]; // Current values of P[J]
517 ElemType PC = P[J+Size/2]; // and P[conj(J)]
518 ElemType QJ = PJ; // New values of P[J]
519 ElemType QC = PC; // and P[conj(J)]
520 if (T[J][Pets] == Switch)
521 QC = PJ;
522 if (T[J+Size/2][Pets] == Switch)
523 QJ = PC;
524 P[J] = QJ;
525 P[J+Size/2] = QC;
526 }
527
528 for (ElemType J = 0; J != Num; ++J)
529 if (P[J] != Ignore && P[J] >= Num/2)
530 P[J] -= Num/2;
531
532 if (Step+1 < Log) {
533 if (UseUp && !route(P, T, Size/2, Step+1))
534 return false;
535 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
536 return false;
537 }
538 return true;
539}
540
541bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
542 unsigned Step) {
543 Coloring G({P,Size});
544 const Coloring::MapType &M = G.colors();
545 if (M.empty())
546 return false;
547 ElemType Num = Size;
548
549 unsigned Pets = 2*Log-1 - Step;
550 bool UseUp = false, UseDown = false;
551
552 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
553 // result in different controls. Let's pick the one where the first
554 // control will be "Pass".
555 ColorKind ColorUp = ColorKind::None;
556 for (ElemType J = 0; J != Num; ++J) {
557 ElemType I = P[J];
558 if (I == Ignore)
559 continue;
560 ColorKind C = M.at(I);
561 if (C == ColorKind::None)
562 continue;
563 if (ColorUp == ColorKind::None) {
564 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
565 }
566 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
567 if (C == ColorUp) {
568 if (I < Num/2)
569 T[I][Step] = Pass;
570 else
571 T[CI][Step] = Switch;
572 T[J][Pets] = (J < Num/2) ? Pass : Switch;
573 UseUp = true;
574 } else { // Down
575 if (I < Num/2)
576 T[CI][Step] = Switch;
577 else
578 T[I][Step] = Pass;
579 T[J][Pets] = (J < Num/2) ? Switch : Pass;
580 UseDown = true;
581 }
582 }
583
584 // Reorder the working permutation according to the computed switch table
585 // for the last step (i.e. Pets).
586 for (ElemType J = 0; J != Num/2; ++J) {
587 ElemType PJ = P[J]; // Current values of P[J]
588 ElemType PC = P[J+Num/2]; // and P[conj(J)]
589 ElemType QJ = PJ; // New values of P[J]
590 ElemType QC = PC; // and P[conj(J)]
591 if (T[J][Pets] == Switch)
592 QC = PJ;
593 if (T[J+Num/2][Pets] == Switch)
594 QJ = PC;
595 P[J] = QJ;
596 P[J+Num/2] = QC;
597 }
598
599 for (ElemType J = 0; J != Num; ++J)
600 if (P[J] != Ignore && P[J] >= Num/2)
601 P[J] -= Num/2;
602
603 if (Step+1 < Log) {
604 if (UseUp && !route(P, T, Size/2, Step+1))
605 return false;
606 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
607 return false;
608 }
609 return true;
610}
611
612// --------------------------------------------------------------------
613// Support for building selection results (output instructions that are
614// parts of the final selection).
615
616namespace {
617struct OpRef {
618 OpRef(SDValue V) : OpV(V) {}
619 bool isValue() const { return OpV.getNode() != nullptr; }
620 bool isValid() const { return isValue() || !(OpN & Invalid); }
621 bool isUndef() const { return OpN & Undef; }
622 static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
623 static OpRef fail() { return OpRef(Invalid); }
624
625 static OpRef lo(const OpRef &R) {
626 assert(!R.isValue());
627 return OpRef(R.OpN & (Undef | Index | LoHalf));
628 }
629 static OpRef hi(const OpRef &R) {
630 assert(!R.isValue());
631 return OpRef(R.OpN & (Undef | Index | HiHalf));
632 }
633 static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
634
635 // Direct value.
636 SDValue OpV = SDValue();
637
638 // Reference to the operand of the input node:
639 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
640 // operand index:
641 // If bit 30 is set, it's the high half of the operand.
642 // If bit 29 is set, it's the low half of the operand.
643 unsigned OpN = 0;
644
645 enum : unsigned {
646 Invalid = 0x10000000,
647 LoHalf = 0x20000000,
648 HiHalf = 0x40000000,
649 Whole = LoHalf | HiHalf,
650 Undef = 0x80000000,
651 Index = 0x0FFFFFFF, // Mask of the index value.
652 IndexBits = 28,
653 };
654
656 void print(raw_ostream &OS, const SelectionDAG &G) const;
657
658private:
659 OpRef(unsigned N) : OpN(N) {}
660};
661
662struct NodeTemplate {
663 NodeTemplate() = default;
664 unsigned Opc = 0;
665 MVT Ty = MVT::Other;
666 std::vector<OpRef> Ops;
667
668 LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
669};
670
671struct ResultStack {
672 ResultStack(SDNode *Inp)
673 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
674 SDNode *InpNode;
675 MVT InpTy;
676 unsigned push(const NodeTemplate &Res) {
677 List.push_back(Res);
678 return List.size()-1;
679 }
680 unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
681 NodeTemplate Res;
682 Res.Opc = Opc;
683 Res.Ty = Ty;
684 Res.Ops = Ops;
685 return push(Res);
686 }
687 bool empty() const { return List.empty(); }
688 unsigned size() const { return List.size(); }
689 unsigned top() const { return size()-1; }
690 const NodeTemplate &operator[](unsigned I) const { return List[I]; }
691 unsigned reset(unsigned NewTop) {
692 List.resize(NewTop+1);
693 return NewTop;
694 }
695
696 using BaseType = std::vector<NodeTemplate>;
697 BaseType::iterator begin() { return List.begin(); }
698 BaseType::iterator end() { return List.end(); }
699 BaseType::const_iterator begin() const { return List.begin(); }
700 BaseType::const_iterator end() const { return List.end(); }
701
703
705 void print(raw_ostream &OS, const SelectionDAG &G) const;
706};
707} // namespace
708
709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
710void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
711 if (isValue()) {
712 OpV.getNode()->print(OS, &G);
713 return;
714 }
715 if (OpN & Invalid) {
716 OS << "invalid";
717 return;
718 }
719 if (OpN & Undef) {
720 OS << "undef";
721 return;
722 }
723 if ((OpN & Whole) != Whole) {
724 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
725 if (OpN & LoHalf)
726 OS << "lo ";
727 else
728 OS << "hi ";
729 }
730 OS << '#' << SignExtend32(OpN & Index, IndexBits);
731}
732
733void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
734 const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
735 OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
736 << TII.getName(Opc);
737 bool Comma = false;
738 for (const auto &R : Ops) {
739 if (Comma)
740 OS << ',';
741 Comma = true;
742 OS << ' ';
743 R.print(OS, G);
744 }
745}
746
747void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
748 OS << "Input node:\n";
749#ifndef NDEBUG
750 InpNode->dumpr(&G);
751#endif
752 OS << "Result templates:\n";
753 for (unsigned I = 0, E = List.size(); I != E; ++I) {
754 OS << '[' << I << "] ";
755 List[I].print(OS, G);
756 OS << '\n';
757 }
758}
759#endif
760
761namespace {
762struct ShuffleMask {
763 ShuffleMask(ArrayRef<int> M) : Mask(M) {
764 for (int M : Mask) {
765 if (M == -1)
766 continue;
767 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
768 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
769 }
770 }
771
773 int MinSrc = -1, MaxSrc = -1;
774
775 ShuffleMask lo() const {
776 size_t H = Mask.size()/2;
777 return ShuffleMask(Mask.take_front(H));
778 }
779 ShuffleMask hi() const {
780 size_t H = Mask.size()/2;
781 return ShuffleMask(Mask.take_back(H));
782 }
783
784 void print(raw_ostream &OS) const {
785 OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
786 for (int M : Mask)
787 OS << ' ' << M;
788 OS << " }";
789 }
790};
791
793raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
794 SM.print(OS);
795 return OS;
796}
797} // namespace
798
799namespace shuffles {
801// Vdd = vshuffvdd(Vu, Vv, Rt)
802// Vdd = vdealvdd(Vu, Vv, Rt)
803// Vd = vpack(Vu, Vv, Size, TakeOdd)
804// Vd = vshuff(Vu, Vv, Size, TakeOdd)
805// Vd = vdeal(Vu, Vv, Size, TakeOdd)
806// Vd = vdealb4w(Vu, Vv)
807
808ArrayRef<int> lo(ArrayRef<int> Vuu) { return Vuu.take_front(Vuu.size() / 2); }
809ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); }
810
812 int Len = Vu.size();
813 MaskT Vdd(2 * Len);
814 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
815 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
816
817 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
818 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
819
820 for (int Offset = 1; Offset < Len; Offset *= 2) {
821 if ((Rt & Offset) == 0)
822 continue;
823 for (int i = 0; i != Len; ++i) {
824 if ((i & Offset) == 0)
825 std::swap(Vd1[i], Vd0[i + Offset]);
826 }
827 }
828 return Vdd;
829}
830
832 int Len = Vu.size();
833 MaskT Vdd(2 * Len);
834 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
835 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
836
837 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
838 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
839
840 for (int Offset = Len / 2; Offset > 0; Offset /= 2) {
841 if ((Rt & Offset) == 0)
842 continue;
843 for (int i = 0; i != Len; ++i) {
844 if ((i & Offset) == 0)
845 std::swap(Vd1[i], Vd0[i + Offset]);
846 }
847 }
848 return Vdd;
849}
850
851MaskT vpack(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
852 int Len = Vu.size();
853 MaskT Vd(Len);
854 auto Odd = static_cast<int>(TakeOdd);
855 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
856 for (int b = 0; b != static_cast<int>(Size); ++b) {
857 // clang-format off
858 Vd[i * Size + b] = Vv[(2 * i + Odd) * Size + b];
859 Vd[i * Size + b + Len / 2] = Vu[(2 * i + Odd) * Size + b];
860 // clang-format on
861 }
862 }
863 return Vd;
864}
865
866MaskT vshuff(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
867 int Len = Vu.size();
868 MaskT Vd(Len);
869 auto Odd = static_cast<int>(TakeOdd);
870 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
871 for (int b = 0; b != static_cast<int>(Size); ++b) {
872 Vd[(2 * i + 0) * Size + b] = Vv[(2 * i + Odd) * Size + b];
873 Vd[(2 * i + 1) * Size + b] = Vu[(2 * i + Odd) * Size + b];
874 }
875 }
876 return Vd;
877}
878
879MaskT vdeal(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
880 int Len = Vu.size();
881 MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size);
882 return vpack(hi(T), lo(T), Size, TakeOdd);
883}
884
886 int Len = Vu.size();
887 MaskT Vd(Len);
888 for (int i = 0, e = Len / 4; i != e; ++i) {
889 Vd[0 * (Len / 4) + i] = Vv[4 * i + 0];
890 Vd[1 * (Len / 4) + i] = Vv[4 * i + 2];
891 Vd[2 * (Len / 4) + i] = Vu[4 * i + 0];
892 Vd[3 * (Len / 4) + i] = Vu[4 * i + 2];
893 }
894 return Vd;
895}
896
897template <typename ShuffFunc, typename... OptArgs>
898auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT {
899 MaskT Vu(Length), Vv(Length);
900 std::iota(Vu.begin(), Vu.end(), Length); // High
901 std::iota(Vv.begin(), Vv.end(), 0); // Low
902 return S(Vu, Vv, args...);
903}
904
905} // namespace shuffles
906
907// --------------------------------------------------------------------
908// The HvxSelector class.
909
911 return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
912}
914 return G.getSubtarget<HexagonSubtarget>();
915}
916
917namespace llvm {
918 struct HvxSelector {
923 const unsigned HwLen;
924
926 : Lower(getHexagonLowering(G)), ISel(HS), DAG(G),
927 HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
928
929 MVT getSingleVT(MVT ElemTy) const {
930 assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
931 unsigned NumElems = HwLen / (ElemTy.getSizeInBits() / 8);
932 return MVT::getVectorVT(ElemTy, NumElems);
933 }
934
935 MVT getPairVT(MVT ElemTy) const {
936 assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
937 unsigned NumElems = (2 * HwLen) / (ElemTy.getSizeInBits() / 8);
938 return MVT::getVectorVT(ElemTy, NumElems);
939 }
940
941 MVT getBoolVT() const {
942 // Return HwLen x i1.
943 return MVT::getVectorVT(MVT::i1, HwLen);
944 }
945
947 void selectShuffle(SDNode *N);
948 void selectRor(SDNode *N);
949 void selectVAlign(SDNode *N);
950
952 unsigned Width);
954 ArrayRef<uint32_t> Completions, unsigned Width);
955 static std::optional<int> rotationDistance(ShuffleMask SM, unsigned WrapAt);
956
957 private:
958 void select(SDNode *ISelN);
959 void materialize(const ResultStack &Results);
960
961 SDValue getConst32(int Val, const SDLoc &dl);
962 SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
963
964 enum : unsigned {
965 None,
966 PackMux,
967 };
968 OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
969 OpRef funnels(OpRef Va, OpRef Vb, int Amount, ResultStack &Results);
970
971 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
972 MutableArrayRef<int> NewMask, unsigned Options = None);
973 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
974 MutableArrayRef<int> NewMask);
975 OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
976 ResultStack &Results);
977 OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
978 ResultStack &Results);
979
980 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
981 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
982 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
983 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
984
985 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
986 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
987 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
988 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
989
990 bool selectVectorConstants(SDNode *N);
991 bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
992 SDValue Va, SDValue Vb, SDNode *N);
993 };
994} // namespace llvm
995
997 MutableArrayRef<int> MaskR) {
998 unsigned VecLen = Mask.size();
999 assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
1000 for (unsigned I = 0; I != VecLen; ++I) {
1001 int M = Mask[I];
1002 if (M < 0) {
1003 MaskL[I] = MaskR[I] = -1;
1004 } else if (unsigned(M) < VecLen) {
1005 MaskL[I] = M;
1006 MaskR[I] = -1;
1007 } else {
1008 MaskL[I] = -1;
1009 MaskR[I] = M-VecLen;
1010 }
1011 }
1012}
1013
1014static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
1015 unsigned MaxLen) {
1016 assert(A.size() > 0 && A.size() >= MaxLen);
1017 int F = A[0];
1018 int E = F;
1019 for (unsigned I = 1; I != MaxLen; ++I) {
1020 if (A[I] - E != Inc)
1021 return { F, I };
1022 E = A[I];
1023 }
1024 return { F, MaxLen };
1025}
1026
1027static bool isUndef(ArrayRef<int> Mask) {
1028 for (int Idx : Mask)
1029 if (Idx != -1)
1030 return false;
1031 return true;
1032}
1033
1034static bool isIdentity(ArrayRef<int> Mask) {
1035 for (int I = 0, E = Mask.size(); I != E; ++I) {
1036 int M = Mask[I];
1037 if (M >= 0 && M != I)
1038 return false;
1039 }
1040 return true;
1041}
1042
1043static bool isLowHalfOnly(ArrayRef<int> Mask) {
1044 int L = Mask.size();
1045 assert(L % 2 == 0);
1046 // Check if the second half of the mask is all-undef.
1047 return llvm::all_of(Mask.drop_front(L / 2), [](int M) { return M < 0; });
1048}
1049
1051 unsigned SegLen) {
1052 assert(isPowerOf2_32(SegLen));
1054 if (SM.MaxSrc == -1)
1055 return SegList;
1056
1057 unsigned Shift = Log2_32(SegLen);
1058 BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
1059
1060 for (int M : SM.Mask) {
1061 if (M >= 0)
1062 Segs.set(M >> Shift);
1063 }
1064
1065 llvm::append_range(SegList, Segs.set_bits());
1066 return SegList;
1067}
1068
1070 unsigned SegLen) {
1071 // Calculate the layout of the output segments in terms of the input
1072 // segments.
1073 // For example [1,3,1,0] means that the output consists of 4 output
1074 // segments, where the first output segment has only elements of the
1075 // input segment at index 1. The next output segment only has elements
1076 // of the input segment 3, etc.
1077 // If an output segment only has undef elements, the value will be ~0u.
1078 // If an output segment has elements from more than one input segment,
1079 // the corresponding value will be ~1u.
1080 unsigned MaskLen = SM.Mask.size();
1081 assert(MaskLen % SegLen == 0);
1082 SmallVector<unsigned, 4> Map(MaskLen / SegLen);
1083
1084 for (int S = 0, E = Map.size(); S != E; ++S) {
1085 unsigned Idx = ~0u;
1086 for (int I = 0; I != static_cast<int>(SegLen); ++I) {
1087 int M = SM.Mask[S*SegLen + I];
1088 if (M < 0)
1089 continue;
1090 unsigned G = M / SegLen; // Input segment of this element.
1091 if (Idx == ~0u) {
1092 Idx = G;
1093 } else if (Idx != G) {
1094 Idx = ~1u;
1095 break;
1096 }
1097 }
1098 Map[S] = Idx;
1099 }
1100
1101 return Map;
1102}
1103
1105 unsigned SegLen, MutableArrayRef<int> PackedMask) {
1107 for (int I = OutSegMap.size() - 1; I >= 0; --I) {
1108 unsigned S = OutSegMap[I];
1109 assert(S != ~0u && "Unexpected undef");
1110 assert(S != ~1u && "Unexpected multi");
1111 if (InvMap.size() <= S)
1112 InvMap.resize(S+1);
1113 InvMap[S] = I;
1114 }
1115
1116 unsigned Shift = Log2_32(SegLen);
1117 for (int I = 0, E = Mask.size(); I != E; ++I) {
1118 int M = Mask[I];
1119 if (M >= 0) {
1120 int OutIdx = InvMap[M >> Shift];
1121 M = (M & (SegLen-1)) + SegLen*OutIdx;
1122 }
1123 PackedMask[I] = M;
1124 }
1125}
1126
1127bool HvxSelector::selectVectorConstants(SDNode *N) {
1128 // Constant vectors are generated as loads from constant pools or as
1129 // splats of a constant value. Since they are generated during the
1130 // selection process, the main selection algorithm is not aware of them.
1131 // Select them directly here.
1133 SetVector<SDNode*> WorkQ;
1134
1135 // The DAG can change (due to CSE) during selection, so cache all the
1136 // unselected nodes first to avoid traversing a mutating DAG.
1137 WorkQ.insert(N);
1138 for (unsigned i = 0; i != WorkQ.size(); ++i) {
1139 SDNode *W = WorkQ[i];
1140 if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1141 Nodes.push_back(W);
1142 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1143 WorkQ.insert(W->getOperand(j).getNode());
1144 }
1145
1146 for (SDNode *L : Nodes)
1147 select(L);
1148
1149 return !Nodes.empty();
1150}
1151
1152void HvxSelector::materialize(const ResultStack &Results) {
1153 DEBUG_WITH_TYPE("isel", {
1154 dbgs() << "Materializing\n";
1155 Results.print(dbgs(), DAG);
1156 });
1157 if (Results.empty())
1158 return;
1159 const SDLoc &dl(Results.InpNode);
1160 std::vector<SDValue> Output;
1161
1162 for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1163 const NodeTemplate &Node = Results[I];
1164 std::vector<SDValue> Ops;
1165 for (const OpRef &R : Node.Ops) {
1166 assert(R.isValid());
1167 if (R.isValue()) {
1168 Ops.push_back(R.OpV);
1169 continue;
1170 }
1171 if (R.OpN & OpRef::Undef) {
1172 MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index);
1173 Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1174 continue;
1175 }
1176 // R is an index of a result.
1177 unsigned Part = R.OpN & OpRef::Whole;
1178 int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1179 if (Idx < 0)
1180 Idx += I;
1181 assert(Idx >= 0 && unsigned(Idx) < Output.size());
1182 SDValue Op = Output[Idx];
1183 MVT OpTy = Op.getValueType().getSimpleVT();
1184 if (Part != OpRef::Whole) {
1185 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1187 OpTy.getVectorNumElements()/2);
1188 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1189 : Hexagon::vsub_hi;
1190 Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1191 }
1192 Ops.push_back(Op);
1193 } // for (Node : Results)
1194
1195 assert(Node.Ty != MVT::Other);
1196 SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1197 ? Ops.front().getNode()
1198 : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1199 Output.push_back(SDValue(ResN, 0));
1200 }
1201
1202 SDNode *OutN = Output.back().getNode();
1203 SDNode *InpN = Results.InpNode;
1204 DEBUG_WITH_TYPE("isel", {
1205 dbgs() << "Generated node:\n";
1206 OutN->dumpr(&DAG);
1207 });
1208
1209 ISel.ReplaceNode(InpN, OutN);
1210 selectVectorConstants(OutN);
1212}
1213
1214OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1215 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1216 const SDLoc &dl(Results.InpNode);
1217 Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1218 getConst32(Hexagon::HvxWRRegClassID, dl),
1219 Lo, getConst32(Hexagon::vsub_lo, dl),
1220 Hi, getConst32(Hexagon::vsub_hi, dl),
1221 });
1222 return OpRef::res(Results.top());
1223}
1224
1225OpRef HvxSelector::funnels(OpRef Va, OpRef Vb, int Amount,
1226 ResultStack &Results) {
1227 // Do a funnel shift towards the low end (shift right) by Amount bytes.
1228 // If Amount < 0, treat it as shift left, i.e. do a shift right by
1229 // Amount + HwLen.
1230 auto VecLen = static_cast<int>(HwLen);
1231
1232 if (Amount == 0)
1233 return Va;
1234 if (Amount == VecLen)
1235 return Vb;
1236
1237 MVT Ty = getSingleVT(MVT::i8);
1238 const SDLoc &dl(Results.InpNode);
1239
1240 if (Amount < 0)
1241 Amount += VecLen;
1242 if (Amount > VecLen) {
1243 Amount -= VecLen;
1244 std::swap(Va, Vb);
1245 }
1246
1247 if (isUInt<3>(Amount)) {
1248 SDValue A = getConst32(Amount, dl);
1249 Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, A});
1250 } else if (isUInt<3>(VecLen - Amount)) {
1251 SDValue A = getConst32(VecLen - Amount, dl);
1252 Results.push(Hexagon::V6_vlalignbi, Ty, {Vb, Va, A});
1253 } else {
1254 SDValue A = getConst32(Amount, dl);
1255 Results.push(Hexagon::A2_tfrsi, Ty, {A});
1256 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1257 }
1258 return OpRef::res(Results.top());
1259}
1260
1261// Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1262// pack these halves into a single vector, and remap SM into NewMask to use
1263// the new vector instead.
1264OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1265 ResultStack &Results, MutableArrayRef<int> NewMask,
1266 unsigned Options) {
1267 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1268 if (!Va.isValid() || !Vb.isValid())
1269 return OpRef::fail();
1270
1271 if (Vb.isUndef()) {
1272 llvm::copy(SM.Mask, NewMask.begin());
1273 return Va;
1274 }
1275 if (Va.isUndef()) {
1276 llvm::copy(SM.Mask, NewMask.begin());
1278 return Vb;
1279 }
1280
1281 MVT Ty = getSingleVT(MVT::i8);
1282 MVT PairTy = getPairVT(MVT::i8);
1283 OpRef Inp[2] = {Va, Vb};
1284 unsigned VecLen = SM.Mask.size();
1285
1286 auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1287 ResultStack &Results) {
1288 if (Amt == 0)
1289 return Lo;
1290 const SDLoc &dl(Results.InpNode);
1291 if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1292 bool IsRight = isUInt<3>(Amt); // Right align.
1293 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1294 unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1295 Results.push(Opc, Ty, {Hi, Lo, S});
1296 return OpRef::res(Results.top());
1297 }
1298 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1299 OpRef A = OpRef::res(Results.top());
1300 Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1301 return OpRef::res(Results.top());
1302 };
1303
1304 // Segment is a vector half.
1305 unsigned SegLen = HwLen / 2;
1306
1307 // Check if we can shuffle vector halves around to get the used elements
1308 // into a single vector.
1309 shuffles::MaskT MaskH(SM.Mask);
1310 SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1311 unsigned SegCount = SegList.size();
1312 SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1313
1314 if (SegList.empty())
1315 return OpRef::undef(Ty);
1316
1317 // NOTE:
1318 // In the following part of the function, where the segments are rearranged,
1319 // the shuffle mask SM can be of any length that is a multiple of a vector
1320 // (i.e. a multiple of 2*SegLen), and non-zero.
1321 // The output segment map is computed, and it may have any even number of
1322 // entries, but the rearrangement of input segments will be done based only
1323 // on the first two (non-undef) entries in the segment map.
1324 // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1325 // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1326 // a single vector V = 3:1. The output mask will then be updated to use
1327 // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1328 //
1329 // Picking the segments based on the output map is an optimization. For
1330 // correctness it is only necessary that Seg0 and Seg1 are the two input
1331 // segments that are used in the output.
1332
1333 unsigned Seg0 = ~0u, Seg1 = ~0u;
1334 for (unsigned X : SegMap) {
1335 if (X == ~0u)
1336 continue;
1337 if (Seg0 == ~0u)
1338 Seg0 = X;
1339 else if (Seg1 != ~0u)
1340 break;
1341 if (X == ~1u || X != Seg0)
1342 Seg1 = X;
1343 }
1344
1345 if (SegCount == 1) {
1346 unsigned SrcOp = SegList[0] / 2;
1347 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1348 int M = SM.Mask[I];
1349 if (M >= 0) {
1350 M -= SrcOp * HwLen;
1351 assert(M >= 0);
1352 }
1353 NewMask[I] = M;
1354 }
1355 return Inp[SrcOp];
1356 }
1357
1358 if (SegCount == 2) {
1359 // Seg0 should not be undef here: this would imply a SegList
1360 // with <= 1 elements, which was checked earlier.
1361 assert(Seg0 != ~0u);
1362
1363 // If Seg0 or Seg1 are "multi-defined", pick them from the input
1364 // segment list instead.
1365 if (Seg0 == ~1u || Seg1 == ~1u) {
1366 if (Seg0 == Seg1) {
1367 Seg0 = SegList[0];
1368 Seg1 = SegList[1];
1369 } else if (Seg0 == ~1u) {
1370 Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1371 } else {
1372 assert(Seg1 == ~1u);
1373 Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1374 }
1375 }
1376 assert(Seg0 != ~1u && Seg1 != ~1u);
1377
1378 assert(Seg0 != Seg1 && "Expecting different segments");
1379 const SDLoc &dl(Results.InpNode);
1380 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1381 OpRef HL = OpRef::res(Results.top());
1382
1383 // Va = AB, Vb = CD
1384
1385 if (Seg0 / 2 == Seg1 / 2) {
1386 // Same input vector.
1387 Va = Inp[Seg0 / 2];
1388 if (Seg0 > Seg1) {
1389 // Swap halves.
1390 Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1391 Va = OpRef::res(Results.top());
1392 }
1393 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1394 } else if (Seg0 % 2 == Seg1 % 2) {
1395 // Picking AC, BD, CA, or DB.
1396 // vshuff(CD,AB,HL) -> BD:AC
1397 // vshuff(AB,CD,HL) -> DB:CA
1398 auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1399 : std::make_pair(Va, Vb); // CA or DB
1400 Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1401 OpRef P = OpRef::res(Results.top());
1402 Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1403 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1404 } else {
1405 // Picking AD, BC, CB, or DA.
1406 if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1407 // AD or BC: this can be done using vmux.
1408 // Q = V6_pred_scalar2 SegLen
1409 // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1410 Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1411 OpRef Qt = OpRef::res(Results.top());
1412 auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1413 : std::make_pair(Vb, Va); // CB
1414 Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1415 Va = OpRef::res(Results.top());
1416 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1417 } else {
1418 // BC or DA: this could be done via valign by SegLen.
1419 // Do nothing here, because valign (if possible) will be generated
1420 // later on (make sure the Seg0 values are as expected).
1421 assert(Seg0 == 1 || Seg0 == 3);
1422 }
1423 }
1424 }
1425
1426 // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1427 // FIXME: maybe remove this?
1428 ShuffleMask SMH(MaskH);
1429 assert(SMH.Mask.size() == VecLen);
1430 shuffles::MaskT MaskA(SMH.Mask);
1431
1432 if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1433 // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1434 shuffles::MaskT Swapped(SMH.Mask);
1436 ShuffleMask SW(Swapped);
1437 if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1438 MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1439 std::swap(Va, Vb);
1440 }
1441 }
1442 ShuffleMask SMA(MaskA);
1443 assert(SMA.Mask.size() == VecLen);
1444
1445 if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1446 int ShiftR = SMA.MinSrc;
1447 if (ShiftR >= static_cast<int>(HwLen)) {
1448 Va = Vb;
1449 Vb = OpRef::undef(Ty);
1450 ShiftR -= HwLen;
1451 }
1452 OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1453
1454 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1455 int M = SMA.Mask[I];
1456 if (M != -1)
1457 M -= SMA.MinSrc;
1458 NewMask[I] = M;
1459 }
1460 return RetVal;
1461 }
1462
1463 // By here, packing by segment (half-vector) shuffling, and vector alignment
1464 // failed. Try vmux.
1465 // Note: since this is using the original mask, Va and Vb must not have been
1466 // modified.
1467
1468 if (Options & PackMux) {
1469 // If elements picked from Va and Vb have all different (source) indexes
1470 // (relative to the start of the argument), do a mux, and update the mask.
1471 BitVector Picked(HwLen);
1473 bool CanMux = true;
1474 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1475 int M = SM.Mask[I];
1476 if (M == -1)
1477 continue;
1478 if (M >= static_cast<int>(HwLen))
1479 M -= HwLen;
1480 else
1481 MuxBytes[M] = 0xFF;
1482 if (Picked[M]) {
1483 CanMux = false;
1484 break;
1485 }
1486 NewMask[I] = M;
1487 }
1488 if (CanMux)
1489 return vmuxs(MuxBytes, Va, Vb, Results);
1490 }
1491 return OpRef::fail();
1492}
1493
1494// Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1495// pack these vectors into a pair, and remap SM into NewMask to use the
1496// new pair instead.
1497OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1498 ResultStack &Results, MutableArrayRef<int> NewMask) {
1499 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1501 if (SegList.empty())
1502 return OpRef::undef(getPairVT(MVT::i8));
1503
1504 // If more than two halves are used, bail.
1505 // TODO: be more aggressive here?
1506 unsigned SegCount = SegList.size();
1507 if (SegCount > 2)
1508 return OpRef::fail();
1509
1510 MVT HalfTy = getSingleVT(MVT::i8);
1511
1512 OpRef Inp[2] = { Va, Vb };
1513 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1514
1515 // Really make sure we have at most 2 vectors used in the mask.
1516 assert(SegCount <= 2);
1517
1518 for (int I = 0, E = SegList.size(); I != E; ++I) {
1519 unsigned S = SegList[I];
1520 OpRef Op = Inp[S / 2];
1521 Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1522 }
1523
1524 // NOTE: Using SegList as the packing map here (not SegMap). This works,
1525 // because we're not concerned here about the order of the segments (i.e.
1526 // single vectors) in the output pair. Changing the order of vectors is
1527 // free (as opposed to changing the order of vector halves as in packs),
1528 // and so there is no extra cost added in case the order needs to be
1529 // changed later.
1530 packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1531 return concats(Out[0], Out[1], Results);
1532}
1533
1534OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1535 ResultStack &Results) {
1536 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1537 MVT ByteTy = getSingleVT(MVT::i8);
1538 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1539 const SDLoc &dl(Results.InpNode);
1540 SDValue B = getVectorConstant(Bytes, dl);
1541 Results.push(Hexagon::V6_vd0, ByteTy, {});
1542 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1543 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1544 return OpRef::res(Results.top());
1545}
1546
1547OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1548 ResultStack &Results) {
1549 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1550 size_t S = Bytes.size() / 2;
1551 OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1552 OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1553 return concats(L, H, Results);
1554}
1555
1556OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1557 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1558 unsigned VecLen = SM.Mask.size();
1559 assert(HwLen == VecLen);
1560 (void)VecLen;
1561 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1562
1563 if (isIdentity(SM.Mask))
1564 return Va;
1565 if (isUndef(SM.Mask))
1566 return OpRef::undef(getSingleVT(MVT::i8));
1567
1568 // First, check for rotations.
1569 if (auto Dist = rotationDistance(SM, VecLen)) {
1570 OpRef Rotate = funnels(Va, Va, *Dist, Results);
1571 if (Rotate.isValid())
1572 return Rotate;
1573 }
1574 unsigned HalfLen = HwLen / 2;
1575 assert(isPowerOf2_32(HalfLen));
1576
1577 // Handle special case where the output is the same half of the input
1578 // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1579 std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1580 if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1581 std::pair<int, unsigned> Strip2 =
1582 findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1583 if (Strip1 == Strip2) {
1584 const SDLoc &dl(Results.InpNode);
1585 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1586 Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1587 {Va, Va, OpRef::res(Results.top())});
1588 OpRef S = OpRef::res(Results.top());
1589 return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1590 }
1591 }
1592
1593 OpRef P = perfect(SM, Va, Results);
1594 if (P.isValid())
1595 return P;
1596 return butterfly(SM, Va, Results);
1597}
1598
1599OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1600 ResultStack &Results) {
1601 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1602 if (isUndef(SM.Mask))
1603 return OpRef::undef(getSingleVT(MVT::i8));
1604
1605 OpRef C = contracting(SM, Va, Vb, Results);
1606 if (C.isValid())
1607 return C;
1608
1609 int VecLen = SM.Mask.size();
1610 shuffles::MaskT PackedMask(VecLen);
1611 OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1612 if (P.isValid())
1613 return shuffs1(ShuffleMask(PackedMask), P, Results);
1614
1615 // TODO: Before we split the mask, try perfect shuffle on concatenated
1616 // operands.
1617
1618 shuffles::MaskT MaskL(VecLen), MaskR(VecLen);
1619 splitMask(SM.Mask, MaskL, MaskR);
1620
1621 OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1622 OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1623 if (!L.isValid() || !R.isValid())
1624 return OpRef::fail();
1625
1626 SmallVector<uint8_t, 128> Bytes(VecLen);
1627 for (int I = 0; I != VecLen; ++I) {
1628 if (MaskL[I] != -1)
1629 Bytes[I] = 0xFF;
1630 }
1631 return vmuxs(Bytes, L, R, Results);
1632}
1633
1634OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1635 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1636 int VecLen = SM.Mask.size();
1637
1638 if (isIdentity(SM.Mask))
1639 return Va;
1640 if (isUndef(SM.Mask))
1641 return OpRef::undef(getPairVT(MVT::i8));
1642
1643 shuffles::MaskT PackedMask(VecLen);
1644 OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1645 if (P.isValid()) {
1646 ShuffleMask PM(PackedMask);
1647 OpRef E = expanding(PM, P, Results);
1648 if (E.isValid())
1649 return E;
1650
1651 OpRef L = shuffs1(PM.lo(), P, Results);
1652 OpRef H = shuffs1(PM.hi(), P, Results);
1653 if (L.isValid() && H.isValid())
1654 return concats(L, H, Results);
1655 }
1656
1657 if (!isLowHalfOnly(SM.Mask)) {
1658 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1659 // is all-undef) may produce a perfect shuffle that generates legitimate
1660 // upper half. This isn't wrong, but if the perfect shuffle was possible,
1661 // then there is a good chance that a shorter (contracting) code may be
1662 // used as well (e.g. V6_vshuffeb, etc).
1663 OpRef R = perfect(SM, Va, Results);
1664 if (R.isValid())
1665 return R;
1666 // TODO commute the mask and try the opposite order of the halves.
1667 }
1668
1669 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1670 OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1671 if (L.isValid() && H.isValid())
1672 return concats(L, H, Results);
1673
1674 return OpRef::fail();
1675}
1676
1677OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1678 ResultStack &Results) {
1679 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1680 if (isUndef(SM.Mask))
1681 return OpRef::undef(getPairVT(MVT::i8));
1682
1683 int VecLen = SM.Mask.size();
1684 SmallVector<int,256> PackedMask(VecLen);
1685 OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1686 if (P.isValid())
1687 return shuffp1(ShuffleMask(PackedMask), P, Results);
1688
1689 SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1690 splitMask(SM.Mask, MaskL, MaskR);
1691
1692 OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1693 OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1694 if (!L.isValid() || !R.isValid())
1695 return OpRef::fail();
1696
1697 // Mux the results.
1698 SmallVector<uint8_t,256> Bytes(VecLen);
1699 for (int I = 0; I != VecLen; ++I) {
1700 if (MaskL[I] != -1)
1701 Bytes[I] = 0xFF;
1702 }
1703 return vmuxp(Bytes, L, R, Results);
1704}
1705
1706namespace {
1707 struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1708 template <typename T>
1709 Deleter(SelectionDAG &D, T &C)
1710 : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1711 C.erase(N);
1712 }) {}
1713 };
1714
1715 template <typename T>
1716 struct NullifyingVector : public T {
1718 NullifyingVector(T &&V) : T(V) {
1719 for (unsigned i = 0, e = T::size(); i != e; ++i) {
1720 SDNode *&N = T::operator[](i);
1721 Refs[N] = &N;
1722 }
1723 }
1724 void erase(SDNode *N) {
1725 auto F = Refs.find(N);
1726 if (F != Refs.end())
1727 *F->second = nullptr;
1728 }
1729 };
1730}
1731
1732void HvxSelector::select(SDNode *ISelN) {
1733 // What's important here is to select the right set of nodes. The main
1734 // selection algorithm loops over nodes in a topological order, i.e. users
1735 // are visited before their operands.
1736 //
1737 // It is an error to have an unselected node with a selected operand, and
1738 // there is an assertion in the main selector code to enforce that.
1739 //
1740 // Such a situation could occur if we selected a node, which is both a
1741 // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1742 // node in the DAG.
1743 assert(ISelN->getOpcode() == HexagonISD::ISEL);
1744 SDNode *N0 = ISelN->getOperand(0).getNode();
1745
1746 // There could have been nodes created (i.e. inserted into the DAG)
1747 // that are now dead. Remove them, in case they use any of the nodes
1748 // to select (and make them look shared).
1750
1751 SetVector<SDNode *> SubNodes;
1752
1753 if (!N0->isMachineOpcode()) {
1754 // Don't want to select N0 if it's shared with another node, except if
1755 // it's shared with other ISELs.
1756 auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1757 if (llvm::all_of(N0->users(), IsISelN))
1758 SubNodes.insert(N0);
1759 }
1760 if (SubNodes.empty()) {
1761 ISel.ReplaceNode(ISelN, N0);
1762 return;
1763 }
1764
1765 // Need to manually select the nodes that are dominated by the ISEL. Other
1766 // nodes are reachable from the rest of the DAG, and so will be selected
1767 // by the DAG selection routine.
1768 SetVector<SDNode*> Dom, NonDom;
1769 Dom.insert(N0);
1770
1771 auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
1772 if (Dom.count(T))
1773 return true;
1774 if (T->use_empty() || NonDom.count(T))
1775 return false;
1776 for (SDNode *U : T->users()) {
1777 // If T is reachable from a known non-dominated node, then T itself
1778 // is non-dominated.
1779 if (!Rec(U, Rec)) {
1780 NonDom.insert(T);
1781 return false;
1782 }
1783 }
1784 Dom.insert(T);
1785 return true;
1786 };
1787
1788 auto IsDom = [&IsDomRec] (SDNode *T) { return IsDomRec(T, IsDomRec); };
1789
1790 // Add the rest of nodes dominated by ISEL to SubNodes.
1791 for (unsigned I = 0; I != SubNodes.size(); ++I) {
1792 for (SDValue Op : SubNodes[I]->ops()) {
1793 SDNode *O = Op.getNode();
1794 if (IsDom(O))
1795 SubNodes.insert(O);
1796 }
1797 }
1798
1799 // Do a topological sort of nodes from Dom.
1800 SetVector<SDNode*> TmpQ;
1801
1802 std::map<SDNode *, unsigned> OpCount;
1803 for (SDNode *T : Dom) {
1804 unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
1805 return Dom.count(U.getNode());
1806 });
1807
1808 OpCount.insert({T, NumDomOps});
1809 if (NumDomOps == 0)
1810 TmpQ.insert(T);
1811 }
1812
1813 for (unsigned I = 0; I != TmpQ.size(); ++I) {
1814 SDNode *S = TmpQ[I];
1815 for (SDNode *U : S->users()) {
1816 if (U == ISelN)
1817 continue;
1818 auto F = OpCount.find(U);
1819 assert(F != OpCount.end());
1820 if (F->second > 0 && !--F->second)
1821 TmpQ.insert(F->first);
1822 }
1823 }
1824
1825 // Remove the marker.
1826 ISel.ReplaceNode(ISelN, N0);
1827
1828 assert(SubNodes.size() == TmpQ.size());
1829 NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1830
1831 Deleter DUQ(DAG, Queue);
1832 for (SDNode *S : reverse(Queue)) {
1833 if (S == nullptr)
1834 continue;
1835 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1836 ISel.Select(S);
1837 }
1838}
1839
1840bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1841 MVT ResTy, SDValue Va, SDValue Vb,
1842 SDNode *N) {
1843 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1844 MVT ElemTy = ResTy.getVectorElementType();
1845 assert(ElemTy == MVT::i8);
1846 unsigned VecLen = Mask.size();
1847 bool HavePairs = (2*HwLen == VecLen);
1848 MVT SingleTy = getSingleVT(MVT::i8);
1849
1850 // The prior attempts to handle this shuffle may have left a bunch of
1851 // dead nodes in the DAG (such as constants). These nodes will be added
1852 // at the end of DAG's node list, which at that point had already been
1853 // sorted topologically. In the main selection loop, the node list is
1854 // traversed backwards from the root node, which means that any new
1855 // nodes (from the end of the list) will not be visited.
1856 // Scalarization will replace the shuffle node with the scalarized
1857 // expression, and if that expression reused any if the leftoever (dead)
1858 // nodes, these nodes would not be selected (since the "local" selection
1859 // only visits nodes that are not in AllNodes).
1860 // To avoid this issue, remove all dead nodes from the DAG now.
1861// DAG.RemoveDeadNodes();
1862
1864 LLVMContext &Ctx = *DAG.getContext();
1865 MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1866 for (int I : Mask) {
1867 if (I < 0) {
1868 Ops.push_back(ISel.selectUndef(dl, LegalTy));
1869 continue;
1870 }
1871 SDValue Vec;
1872 unsigned M = I;
1873 if (M < VecLen) {
1874 Vec = Va;
1875 } else {
1876 Vec = Vb;
1877 M -= VecLen;
1878 }
1879 if (HavePairs) {
1880 if (M < HwLen) {
1881 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1882 } else {
1883 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1884 M -= HwLen;
1885 }
1886 }
1887 SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1888 SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1890 assert(L.getNode());
1891 Ops.push_back(L);
1892 }
1893
1894 SDValue LV;
1895 if (2*HwLen == VecLen) {
1896 SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1897 SDValue L0 = Lower.LowerOperation(B0, DAG);
1898 SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1899 SDValue L1 = Lower.LowerOperation(B1, DAG);
1900 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1901 // functions may expect to be called only for illegal operations, so
1902 // make sure that they are not called for legal ones. Develop a better
1903 // mechanism for dealing with this.
1904 LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1905 } else {
1906 SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1907 LV = Lower.LowerOperation(BV, DAG);
1908 }
1909
1910 assert(!N->use_empty());
1911 SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1912 ISel.ReplaceNode(N, IS.getNode());
1913 select(IS.getNode());
1915 return true;
1916}
1917
1919 unsigned Width) {
1920 auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t {
1921 unsigned Impossible = ~(1u << Width) + 1;
1922 for (unsigned I = 0, E = Bs.size(); I != E; ++I) {
1923 uint8_t B = Bs[I];
1924 if (B == 0xff)
1925 continue;
1926 if (~Impossible == 0)
1927 break;
1928 for (unsigned Log = 0; Log != Width; ++Log) {
1929 if (Impossible & (1u << Log))
1930 continue;
1931 unsigned Expected = (I >> Log) % 2;
1932 if (B != Expected)
1933 Impossible |= (1u << Log);
1934 }
1935 }
1936 return ~Impossible;
1937 };
1938
1939 SmallVector<uint32_t, 8> Worklist(Width);
1940
1941 for (unsigned BitIdx = 0; BitIdx != Width; ++BitIdx) {
1942 SmallVector<uint8_t> BitValues(SM.Mask.size());
1943 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
1944 int M = SM.Mask[i];
1945 if (M < 0)
1946 BitValues[i] = 0xff;
1947 else
1948 BitValues[i] = (M & (1u << BitIdx)) != 0;
1949 }
1950 Worklist[BitIdx] = possibilities(BitValues, Width);
1951 }
1952
1953 // If there is a word P in Worklist that matches multiple possibilities,
1954 // then if any other word Q matches any of the possibilities matched by P,
1955 // then Q matches all the possibilities matched by P. In fact, P == Q.
1956 // In other words, for each words P, Q, the sets of possibilities matched
1957 // by P and Q are either equal or disjoint (no partial overlap).
1958 //
1959 // Illustration: For 4-bit values there are 4 complete sequences:
1960 // a: 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
1961 // b: 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
1962 // c: 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1
1963 // d: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
1964 //
1965 // Words containing unknown bits that match two of the complete
1966 // sequences:
1967 // ab: 0 u u 1 0 u u 1 0 u u 1 0 u u 1
1968 // ac: 0 u 0 u u 1 u 1 0 u 0 u u 1 u 1
1969 // ad: 0 u 0 u 0 u 0 u u 1 u 1 u 1 u 1
1970 // bc: 0 0 u u u u 1 1 0 0 u u u u 1 1
1971 // bd: 0 0 u u 0 0 u u u u 1 1 u u 1 1
1972 // cd: 0 0 0 0 u u u u u u u u 1 1 1 1
1973 //
1974 // Proof of the claim above:
1975 // Let P be a word that matches s0 and s1. For that to happen, all known
1976 // bits in P must match s0 and s1 exactly.
1977 // Assume there is Q that matches s1. Note that since P and Q came from
1978 // the same shuffle mask, the positions of unknown bits in P and Q match
1979 // exactly, which makes the indices of known bits be exactly the same
1980 // between P and Q. Since P matches s0 and s1, the known bits of P much
1981 // match both s0 and s1. Also, since Q matches s1, the known bits in Q
1982 // are exactly the same as in s1, which means that they are exactly the
1983 // same as in P. This implies that P == Q.
1984
1985 // There can be a situation where there are more entries with the same
1986 // bits set than there are set bits (e.g. value 9 occurring more than 2
1987 // times). In such cases it will be impossible to complete this to a
1988 // perfect shuffle.
1989 SmallVector<uint32_t, 8> Sorted(Worklist);
1990 llvm::sort(Sorted);
1991
1992 for (unsigned I = 0, E = Sorted.size(); I != E;) {
1993 unsigned P = Sorted[I], Count = 1;
1994 while (++I != E && P == Sorted[I])
1995 ++Count;
1996 if ((unsigned)llvm::popcount(P) < Count) {
1997 // Reset all occurrences of P, if there are more occurrences of P
1998 // than there are bits in P.
1999 llvm::replace(Worklist, P, 0U);
2000 }
2001 }
2002
2003 return Worklist;
2004}
2005
2008 // Pick a completion if there are multiple possibilities. For now just
2009 // select any valid completion.
2010 SmallVector<uint32_t, 8> Comps(Completions);
2011
2012 for (unsigned I = 0; I != Width; ++I) {
2013 uint32_t P = Comps[I];
2014 assert(P != 0);
2015 if (isPowerOf2_32(P))
2016 continue;
2017 // T = least significant bit of P.
2018 uint32_t T = P ^ ((P - 1) & P);
2019 // Clear T in all remaining words matching P.
2020 for (unsigned J = I + 1; J != Width; ++J) {
2021 if (Comps[J] == P)
2022 Comps[J] ^= T;
2023 }
2024 Comps[I] = T;
2025 }
2026
2027#ifndef NDEBUG
2028 // Check that we have generated a valid completion.
2029 uint32_t OrAll = 0;
2030 for (uint32_t C : Comps) {
2032 OrAll |= C;
2033 }
2034 assert(OrAll == (1u << Width) -1);
2035#endif
2036
2037 return Comps;
2038}
2039
2040std::optional<int> HvxSelector::rotationDistance(ShuffleMask SM,
2041 unsigned WrapAt) {
2042 std::optional<int> Dist;
2043 for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
2044 int M = SM.Mask[I];
2045 if (M < 0)
2046 continue;
2047 if (Dist) {
2048 if ((I + *Dist) % static_cast<int>(WrapAt) != M)
2049 return std::nullopt;
2050 } else {
2051 // Integer a%b operator assumes rounding towards zero by /, so it
2052 // "misbehaves" when a crosses 0 (the remainder also changes sign).
2053 // Add WrapAt in an attempt to keep I+Dist non-negative.
2054 Dist = M - I;
2055 if (Dist < 0)
2056 Dist = *Dist + WrapAt;
2057 }
2058 }
2059 return Dist;
2060}
2061
2062OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
2063 ResultStack &Results) {
2064 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2065 if (!Va.isValid() || !Vb.isValid())
2066 return OpRef::fail();
2067
2068 // Contracting shuffles, i.e. instructions that always discard some bytes
2069 // from the operand vectors.
2070 //
2071 // Funnel shifts
2072 // V6_vshuff{e,o}b
2073 // V6_vshuf{e,o}h
2074 // V6_vdealb4w
2075 // V6_vpack{e,o}{b,h}
2076
2077 int VecLen = SM.Mask.size();
2078
2079 // First, check for funnel shifts.
2080 if (auto Dist = rotationDistance(SM, 2 * VecLen)) {
2081 OpRef Funnel = funnels(Va, Vb, *Dist, Results);
2082 if (Funnel.isValid())
2083 return Funnel;
2084 }
2085
2086 MVT SingleTy = getSingleVT(MVT::i8);
2087 MVT PairTy = getPairVT(MVT::i8);
2088
2089 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool {
2090 return Mask1 == Mask2;
2091 };
2092
2093 using PackConfig = std::pair<unsigned, bool>;
2094 PackConfig Packs[] = {
2095 {1, false}, // byte, even
2096 {1, true}, // byte, odd
2097 {2, false}, // half, even
2098 {2, true}, // half, odd
2099 };
2100
2101 { // Check vpack
2102 unsigned Opcodes[] = {
2103 Hexagon::V6_vpackeb,
2104 Hexagon::V6_vpackob,
2105 Hexagon::V6_vpackeh,
2106 Hexagon::V6_vpackoh,
2107 };
2108 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2109 auto [Size, Odd] = Packs[i];
2110 if (same(SM.Mask, shuffles::mask(shuffles::vpack, HwLen, Size, Odd))) {
2111 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2112 return OpRef::res(Results.top());
2113 }
2114 }
2115 }
2116
2117 { // Check vshuff
2118 unsigned Opcodes[] = {
2119 Hexagon::V6_vshuffeb,
2120 Hexagon::V6_vshuffob,
2121 Hexagon::V6_vshufeh,
2122 Hexagon::V6_vshufoh,
2123 };
2124 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2125 auto [Size, Odd] = Packs[i];
2126 if (same(SM.Mask, shuffles::mask(shuffles::vshuff, HwLen, Size, Odd))) {
2127 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2128 return OpRef::res(Results.top());
2129 }
2130 }
2131 }
2132
2133 { // Check vdeal
2134 // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb
2135 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2136 // variants of "deal" can be done similarly.
2137 unsigned Opcodes[] = {
2138 Hexagon::V6_vpackeb,
2139 Hexagon::V6_vpackob,
2140 Hexagon::V6_vpackeh,
2141 Hexagon::V6_vpackoh,
2142 };
2143 const SDLoc &dl(Results.InpNode);
2144
2145 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2146 auto [Size, Odd] = Packs[i];
2147 if (same(SM.Mask, shuffles::mask(shuffles::vdeal, HwLen, Size, Odd))) {
2148 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-2 * Size, dl)});
2149 Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)});
2150 auto vdeal = OpRef::res(Results.top());
2151 Results.push(Opcodes[i], SingleTy,
2152 {OpRef::hi(vdeal), OpRef::lo(vdeal)});
2153 return OpRef::res(Results.top());
2154 }
2155 }
2156 }
2157
2158 if (same(SM.Mask, shuffles::mask(shuffles::vdealb4w, HwLen))) {
2159 Results.push(Hexagon::V6_vdealb4w, SingleTy, {Vb, Va});
2160 return OpRef::res(Results.top());
2161 }
2162
2163 return OpRef::fail();
2164}
2165
2166OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2167 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2168 // Expanding shuffles (using all elements and inserting into larger vector):
2169 //
2170 // V6_vunpacku{b,h} [*]
2171 //
2172 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
2173 //
2174 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
2175 // they are not shuffles.
2176 //
2177 // The argument is a single vector.
2178
2179 int VecLen = SM.Mask.size();
2180 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
2181
2182 std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
2183
2184 // The patterns for the unpacks, in terms of the starting offsets of the
2185 // consecutive strips (L = length of the strip, N = VecLen):
2186 //
2187 // vunpacku: 0, -1, L, -1, 2L, -1 ...
2188
2189 if (Strip.first != 0)
2190 return OpRef::fail();
2191
2192 // The vunpackus only handle byte and half-word.
2193 if (Strip.second != 1 && Strip.second != 2)
2194 return OpRef::fail();
2195
2196 int N = VecLen;
2197 int L = Strip.second;
2198
2199 // First, check the non-ignored strips.
2200 for (int I = 2*L; I < N; I += 2*L) {
2201 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
2202 if (S.second != unsigned(L))
2203 return OpRef::fail();
2204 if (2*S.first != I)
2205 return OpRef::fail();
2206 }
2207 // Check the -1s.
2208 for (int I = L; I < N; I += 2*L) {
2209 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
2210 if (S.first != -1 || S.second != unsigned(L))
2211 return OpRef::fail();
2212 }
2213
2214 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
2215 : Hexagon::V6_vunpackuh;
2216 Results.push(Opc, getPairVT(MVT::i8), {Va});
2217 return OpRef::res(Results.top());
2218}
2219
2220OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2221 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2222 // V6_vdeal{b,h}
2223 // V6_vshuff{b,h}
2224
2225 // V6_vshufoe{b,h} those are equivalent to vshuffvdd(..,{1,2})
2226 // V6_vshuffvdd (V6_vshuff)
2227 // V6_dealvdd (V6_vdeal)
2228
2229 int VecLen = SM.Mask.size();
2230 assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
2231 unsigned LogLen = Log2_32(VecLen);
2232 unsigned HwLog = Log2_32(HwLen);
2233 // The result length must be the same as the length of a single vector,
2234 // or a vector pair.
2235 assert(LogLen == HwLog || LogLen == HwLog + 1);
2236 bool HavePairs = LogLen == HwLog + 1;
2237
2238 SmallVector<unsigned, 8> Perm(LogLen);
2239
2240 // Check if this could be a perfect shuffle, or a combination of perfect
2241 // shuffles.
2242 //
2243 // Consider this permutation (using hex digits to make the ASCII diagrams
2244 // easier to read):
2245 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
2246 // This is a "deal" operation: divide the input into two halves, and
2247 // create the output by picking elements by alternating between these two
2248 // halves:
2249 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
2250 // 8 9 A B C D E F
2251 //
2252 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
2253 // a somwehat different mechanism that could be used to perform shuffle/
2254 // deal operations: a 2x2 transpose.
2255 // Consider the halves of inputs again, they can be interpreted as a 2x8
2256 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
2257 // together. Now, when considering 2 elements at a time, it will be a 2x4
2258 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
2259 // 01 23 45 67
2260 // 89 AB CD EF
2261 // With groups of 4, this will become a single 2x2 matrix, and so on.
2262 //
2263 // The 2x2 transpose instruction works by transposing each of the 2x2
2264 // matrices (or "sub-matrices"), given a specific group size. For example,
2265 // if the group size is 1 (i.e. each element is its own group), there
2266 // will be four transposes of the four 2x2 matrices that form the 2x8.
2267 // For example, with the inputs as above, the result will be:
2268 // 0 8 2 A 4 C 6 E
2269 // 1 9 3 B 5 D 7 F
2270 // Now, this result can be transposed again, but with the group size of 2:
2271 // 08 19 4C 5D
2272 // 2A 3B 6E 7F
2273 // If we then transpose that result, but with the group size of 4, we get:
2274 // 0819 2A3B
2275 // 4C5D 6E7F
2276 // If we concatenate these two rows, it will be
2277 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
2278 // which is the same as the "deal" [*] above.
2279 //
2280 // In general, a "deal" of individual elements is a series of 2x2 transposes,
2281 // with changing group size. HVX has two instructions:
2282 // Vdd = V6_vdealvdd Vu, Vv, Rt
2283 // Vdd = V6_shufvdd Vu, Vv, Rt
2284 // that perform exactly that. The register Rt controls which transposes are
2285 // going to happen: a bit at position n (counting from 0) indicates that a
2286 // transpose with a group size of 2^n will take place. If multiple bits are
2287 // set, multiple transposes will happen: vdealvdd will perform them starting
2288 // with the largest group size, vshuffvdd will do them in the reverse order.
2289 //
2290 // The main observation is that each 2x2 transpose corresponds to swapping
2291 // columns of bits in the binary representation of the values.
2292 //
2293 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
2294 // in a given column. The * denote the columns that will be swapped.
2295 // The transpose with the group size 2^n corresponds to swapping columns
2296 // 3 (the highest log) and log2(n):
2297 //
2298 // 3 2 1 0 0 2 1 3 0 2 3 1
2299 // * * * * * *
2300 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2301 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
2302 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
2303 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
2304 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
2305 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
2306 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
2307 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
2308 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
2309 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
2310 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
2311 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
2312 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
2313 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
2314 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
2315 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
2316
2317 // There is one special case that is not a perfect shuffle, but can be
2318 // turned into one easily: when the shuffle operates on a vector pair,
2319 // but the two vectors in the pair are swapped. The code that identifies
2320 // perfect shuffles will reject it, unless the order is reversed.
2321 shuffles::MaskT MaskStorage(SM.Mask);
2322 bool InvertedPair = false;
2323 if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2324 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2325 int M = SM.Mask[i];
2326 MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen;
2327 }
2328 InvertedPair = true;
2329 SM = ShuffleMask(MaskStorage);
2330 }
2331
2332 auto Comps = getPerfectCompletions(SM, LogLen);
2333 if (llvm::is_contained(Comps, 0))
2334 return OpRef::fail();
2335
2336 auto Pick = completeToPerfect(Comps, LogLen);
2337 for (unsigned I = 0; I != LogLen; ++I)
2338 Perm[I] = Log2_32(Pick[I]);
2339
2340 // Once we have Perm, represent it as cycles. Denote the maximum log2
2341 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2342 // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2343 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2344 // order being from left to right. Any (contiguous) segment where the
2345 // values ai, ai+1...aj are either all increasing or all decreasing,
2346 // can be implemented via a single vshuffvdd/vdealvdd respectively.
2347 //
2348 // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2349 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2350 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2351 // can be used to generate a sequence of vshuffvdd/vdealvdd.
2352 //
2353 // Example:
2354 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2355 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2356 // (4 0 1)(4 0)(4 2 3)(4 2).
2357 // It can then be expanded into swaps as
2358 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2359 // and broken up into "increasing" segments as
2360 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2361 // This is equivalent to
2362 // (4 0 1)(4 0 2 3)(4 2),
2363 // which can be implemented as 3 vshufvdd instructions.
2364
2365 using CycleType = SmallVector<unsigned, 8>;
2366 std::set<CycleType> Cycles;
2367 std::set<unsigned> All;
2368
2369 for (unsigned I : Perm)
2370 All.insert(I);
2371
2372 // If the cycle contains LogLen-1, move it to the front of the cycle.
2373 // Otherwise, return the cycle unchanged.
2374 auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2375 unsigned LogPos, N = C.size();
2376 for (LogPos = 0; LogPos != N; ++LogPos)
2377 if (C[LogPos] == LogLen - 1)
2378 break;
2379 if (LogPos == N)
2380 return C;
2381
2382 CycleType NewC(C.begin() + LogPos, C.end());
2383 NewC.append(C.begin(), C.begin() + LogPos);
2384 return NewC;
2385 };
2386
2387 auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2388 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2389 // for bytes zero is included, for halfwords is not.
2390 if (Cs.size() != 1)
2391 return 0u;
2392 const CycleType &C = *Cs.begin();
2393 if (C[0] != Len - 1)
2394 return 0u;
2395 int D = Len - C.size();
2396 if (D != 0 && D != 1)
2397 return 0u;
2398
2399 bool IsDeal = true, IsShuff = true;
2400 for (unsigned I = 1; I != Len - D; ++I) {
2401 if (C[I] != Len - 1 - I)
2402 IsDeal = false;
2403 if (C[I] != I - (1 - D)) // I-1, I
2404 IsShuff = false;
2405 }
2406 // At most one, IsDeal or IsShuff, can be non-zero.
2407 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2408 static unsigned Deals[] = {Hexagon::V6_vdealb, Hexagon::V6_vdealh};
2409 static unsigned Shufs[] = {Hexagon::V6_vshuffb, Hexagon::V6_vshuffh};
2410 return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2411 };
2412
2413 while (!All.empty()) {
2414 unsigned A = *All.begin();
2415 All.erase(A);
2416 CycleType C;
2417 C.push_back(A);
2418 for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2419 C.push_back(B);
2420 All.erase(B);
2421 }
2422 if (C.size() <= 1)
2423 continue;
2424 Cycles.insert(canonicalize(C));
2425 }
2426
2427 MVT SingleTy = getSingleVT(MVT::i8);
2428 MVT PairTy = getPairVT(MVT::i8);
2429
2430 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2431 if (unsigned(VecLen) == HwLen) {
2432 if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2433 Results.push(SingleOpc, SingleTy, {Va});
2434 return OpRef::res(Results.top());
2435 }
2436 }
2437
2438 // From the cycles, construct the sequence of values that will
2439 // then form the control values for vdealvdd/vshuffvdd, i.e.
2440 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2441 // This essentially strips the M value from the cycles where
2442 // it's present, and performs the insertion of M (then stripping)
2443 // for cycles without M (as described in an earlier comment).
2444 SmallVector<unsigned, 8> SwapElems;
2445 // When the input is extended (i.e. single vector becomes a pair),
2446 // this is done by using an "undef" vector as the second input.
2447 // However, then we get
2448 // input 1: GOODBITS
2449 // input 2: ........
2450 // but we need
2451 // input 1: ....BITS
2452 // input 2: ....GOOD
2453 // Then at the end, this needs to be undone. To accomplish this,
2454 // artificially add "LogLen-1" at both ends of the sequence.
2455 if (!HavePairs)
2456 SwapElems.push_back(LogLen - 1);
2457 for (const CycleType &C : Cycles) {
2458 // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2459 unsigned First = (C[0] == LogLen - 1) ? 1 : 0;
2460 SwapElems.append(C.begin() + First, C.end());
2461 if (First == 0)
2462 SwapElems.push_back(C[0]);
2463 }
2464 if (!HavePairs)
2465 SwapElems.push_back(LogLen - 1);
2466
2467 const SDLoc &dl(Results.InpNode);
2468 OpRef Arg = HavePairs ? Va : concats(Va, OpRef::undef(SingleTy), Results);
2469 if (InvertedPair)
2470 Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2471
2472 for (unsigned I = 0, E = SwapElems.size(); I != E;) {
2473 bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1];
2474 unsigned S = (1u << SwapElems[I]);
2475 if (I < E - 1) {
2476 while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1]))
2477 S |= 1u << SwapElems[I];
2478 // The above loop will not add a bit for the final SwapElems[I+1],
2479 // so add it here.
2480 S |= 1u << SwapElems[I];
2481 }
2482 ++I;
2483
2484 NodeTemplate Res;
2485 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2486 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2487 Res.Ty = PairTy;
2488 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
2489 Results.push(Res);
2490 Arg = OpRef::res(Results.top());
2491 }
2492
2493 return HavePairs ? Arg : OpRef::lo(Arg);
2494}
2495
2496OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2497 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2498 // Butterfly shuffles.
2499 //
2500 // V6_vdelta
2501 // V6_vrdelta
2502 // V6_vror
2503
2504 // The assumption here is that all elements picked by Mask are in the
2505 // first operand to the vector_shuffle. This assumption is enforced
2506 // by the caller.
2507
2508 MVT ResTy = getSingleVT(MVT::i8);
2509 PermNetwork::Controls FC, RC;
2510 const SDLoc &dl(Results.InpNode);
2511 int VecLen = SM.Mask.size();
2512
2513 for (int M : SM.Mask) {
2514 if (M != -1 && M >= VecLen)
2515 return OpRef::fail();
2516 }
2517
2518 // Try the deltas/benes for both single vectors and vector pairs.
2519 ForwardDeltaNetwork FN(SM.Mask);
2520 if (FN.run(FC)) {
2521 SDValue Ctl = getVectorConstant(FC, dl);
2522 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2523 return OpRef::res(Results.top());
2524 }
2525
2526 // Try reverse delta.
2527 ReverseDeltaNetwork RN(SM.Mask);
2528 if (RN.run(RC)) {
2529 SDValue Ctl = getVectorConstant(RC, dl);
2530 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2531 return OpRef::res(Results.top());
2532 }
2533
2534 // Do Benes.
2535 BenesNetwork BN(SM.Mask);
2536 if (BN.run(FC, RC)) {
2537 SDValue CtlF = getVectorConstant(FC, dl);
2538 SDValue CtlR = getVectorConstant(RC, dl);
2539 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2540 Results.push(Hexagon::V6_vrdelta, ResTy,
2541 {OpRef::res(-1), OpRef(CtlR)});
2542 return OpRef::res(Results.top());
2543 }
2544
2545 return OpRef::fail();
2546}
2547
2548SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2549 return DAG.getTargetConstant(Val, dl, MVT::i32);
2550}
2551
2552SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2553 const SDLoc &dl) {
2555 for (uint8_t C : Data)
2556 Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2557 MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2558 SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2559 SDValue LV = Lower.LowerOperation(BV, DAG);
2561 return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2562}
2563
2565 SDValue Inp = N->getOperand(0);
2566 MVT ResTy = N->getValueType(0).getSimpleVT();
2567 unsigned Idx = N->getConstantOperandVal(1);
2568
2569 [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
2570 [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
2572 assert(2 * ResLen == InpTy.getVectorNumElements());
2573 assert(Idx == 0 || Idx == ResLen);
2574
2575 unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2576 SDValue Ext = DAG.getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
2577
2578 ISel.ReplaceNode(N, Ext.getNode());
2579}
2580
2582 DEBUG_WITH_TYPE("isel", {
2583 dbgs() << "Starting " << __func__ << " on node:\n";
2584 N->dump(&DAG);
2585 });
2586 MVT ResTy = N->getValueType(0).getSimpleVT();
2587 // Assume that vector shuffles operate on vectors of bytes.
2588 assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2589
2590 auto *SN = cast<ShuffleVectorSDNode>(N);
2591 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2592 // This shouldn't really be necessary. Is it?
2593 for (int &Idx : Mask)
2594 if (Idx != -1 && Idx < 0)
2595 Idx = -1;
2596
2597 unsigned VecLen = Mask.size();
2598 bool HavePairs = (2*HwLen == VecLen);
2599 assert(ResTy.getSizeInBits() / 8 == VecLen);
2600
2601 // Vd = vector_shuffle Va, Vb, Mask
2602 //
2603
2604 bool UseLeft = false, UseRight = false;
2605 for (unsigned I = 0; I != VecLen; ++I) {
2606 if (Mask[I] == -1)
2607 continue;
2608 unsigned Idx = Mask[I];
2609 assert(Idx < 2*VecLen);
2610 if (Idx < VecLen)
2611 UseLeft = true;
2612 else
2613 UseRight = true;
2614 }
2615
2616 DEBUG_WITH_TYPE("isel", {
2617 dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2618 << UseLeft << " UseRight=" << UseRight << " HavePairs="
2619 << HavePairs << '\n';
2620 });
2621 // If the mask is all -1's, generate "undef".
2622 if (!UseLeft && !UseRight) {
2623 ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2624 return;
2625 }
2626
2627 SDValue Vec0 = N->getOperand(0);
2628 SDValue Vec1 = N->getOperand(1);
2629 assert(Vec0.getValueType() == ResTy && Vec1.getValueType() == ResTy);
2630
2631 ResultStack Results(SN);
2632 OpRef Va = OpRef::undef(ResTy);
2633 OpRef Vb = OpRef::undef(ResTy);
2634
2635 if (!Vec0.isUndef()) {
2636 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2637 Va = OpRef::OpRef::res(Results.top());
2638 }
2639 if (!Vec1.isUndef()) {
2640 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2641 Vb = OpRef::res(Results.top());
2642 }
2643
2644 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2645 : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2646
2647 bool Done = Res.isValid();
2648 if (Done) {
2649 // Make sure that Res is on the stack before materializing.
2650 Results.push(TargetOpcode::COPY, ResTy, {Res});
2651 materialize(Results);
2652 } else {
2653 Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2654 }
2655
2656 if (!Done) {
2657#ifndef NDEBUG
2658 dbgs() << "Unhandled shuffle:\n";
2659 SN->dumpr(&DAG);
2660#endif
2661 llvm_unreachable("Failed to select vector shuffle");
2662 }
2663}
2664
2666 // If this is a rotation by less than 8, use V6_valignbi.
2667 MVT Ty = N->getValueType(0).getSimpleVT();
2668 const SDLoc &dl(N);
2669 SDValue VecV = N->getOperand(0);
2670 SDValue RotV = N->getOperand(1);
2671 SDNode *NewN = nullptr;
2672
2673 if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2674 unsigned S = CN->getZExtValue() % HST.getVectorLength();
2675 if (S == 0) {
2676 NewN = VecV.getNode();
2677 } else if (isUInt<3>(S)) {
2678 NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2679 {VecV, VecV, getConst32(S, dl)});
2680 }
2681 }
2682
2683 if (!NewN)
2684 NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2685
2686 ISel.ReplaceNode(N, NewN);
2687}
2688
2690 SDValue Vv = N->getOperand(0);
2691 SDValue Vu = N->getOperand(1);
2692 SDValue Rt = N->getOperand(2);
2693 SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2694 N->getValueType(0), {Vv, Vu, Rt});
2695 ISel.ReplaceNode(N, NewN);
2697}
2698
2699void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2700 auto getNodes = [this]() -> std::vector<SDNode *> {
2701 std::vector<SDNode *> T;
2702 T.reserve(CurDAG->allnodes_size());
2703 for (SDNode &N : CurDAG->allnodes())
2704 T.push_back(&N);
2705 return T;
2706 };
2707
2708 ppHvxShuffleOfShuffle(getNodes());
2709}
2710
2711template <> struct std::hash<SDValue> {
2712 std::size_t operator()(SDValue V) const {
2713 return std::hash<const void *>()(V.getNode()) +
2714 std::hash<unsigned>()(V.getResNo());
2715 };
2716};
2717
2718void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector<SDNode *> &&Nodes) {
2719 // Motivating case:
2720 // t10: v64i32 = ...
2721 // t46: v128i8 = vector_shuffle<...> t44, t45
2722 // t48: v128i8 = vector_shuffle<...> t44, t45
2723 // t42: v128i8 = vector_shuffle<...> t46, t48
2724 // t12: v32i32 = extract_subvector t10, Constant:i32<0>
2725 // t44: v128i8 = bitcast t12
2726 // t15: v32i32 = extract_subvector t10, Constant:i32<32>
2727 // t45: v128i8 = bitcast t15
2728 SelectionDAG &DAG = *CurDAG;
2729 unsigned HwLen = HST->getVectorLength();
2730
2731 struct SubVectorInfo {
2732 SubVectorInfo(SDValue S, unsigned H) : Src(S), HalfIdx(H) {}
2733 SDValue Src;
2734 unsigned HalfIdx;
2735 };
2736
2737 using MapType = std::unordered_map<SDValue, unsigned>;
2738
2739 auto getMaskElt = [&](unsigned Idx, ShuffleVectorSDNode *Shuff0,
2740 ShuffleVectorSDNode *Shuff1,
2741 const MapType &OpMap) -> int {
2742 // Treat Shuff0 and Shuff1 as operands to another vector shuffle, and
2743 // Idx as a (non-undef) element of the top level shuffle's mask, that
2744 // is, index into concat(Shuff0, Shuff1).
2745 // Assuming that Shuff0 and Shuff1 both operate on subvectors of the
2746 // same source vector (as described by OpMap), return the index of
2747 // that source vector corresponding to Idx.
2748 ShuffleVectorSDNode *OpShuff = Idx < HwLen ? Shuff0 : Shuff1;
2749 if (Idx >= HwLen)
2750 Idx -= HwLen;
2751
2752 // Get the mask index that M points at in the corresponding operand.
2753 int MaybeN = OpShuff->getMaskElt(Idx);
2754 if (MaybeN < 0)
2755 return -1;
2756
2757 auto N = static_cast<unsigned>(MaybeN);
2758 unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0))
2759 : OpMap.at(OpShuff->getOperand(1));
2760 if (N >= HwLen)
2761 N -= HwLen;
2762
2763 return N + SrcBase;
2764 };
2765
2766 auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue {
2767 // Fold all 3 shuffles into a single one.
2768 auto *This = cast<ShuffleVectorSDNode>(TopShuff);
2769 auto *S0 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(0));
2770 auto *S1 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(1));
2771 ArrayRef<int> TopMask = This->getMask();
2772 // This should be guaranteed by type checks in the caller, and the fact
2773 // that all shuffles should have been promoted to operate on MVT::i8.
2774 assert(TopMask.size() == S0->getMask().size() &&
2775 TopMask.size() == S1->getMask().size());
2776 assert(TopMask.size() == HwLen);
2777
2778 SmallVector<int, 256> FoldedMask(2 * HwLen);
2779 for (unsigned I = 0; I != HwLen; ++I) {
2780 int MaybeM = TopMask[I];
2781 if (MaybeM >= 0) {
2782 FoldedMask[I] =
2783 getMaskElt(static_cast<unsigned>(MaybeM), S0, S1, OpMap);
2784 } else {
2785 FoldedMask[I] = -1;
2786 }
2787 }
2788 // The second half of the result will be all-undef.
2789 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2790
2791 // Return
2792 // FoldedShuffle = (Shuffle Inp, undef, FoldedMask)
2793 // (LoHalf FoldedShuffle)
2794 const SDLoc &dl(TopShuff);
2795 MVT SingleTy = MVT::getVectorVT(MVT::i8, HwLen);
2796 MVT PairTy = MVT::getVectorVT(MVT::i8, 2 * HwLen);
2797 SDValue FoldedShuff =
2798 DAG.getVectorShuffle(PairTy, dl, DAG.getBitcast(PairTy, Inp),
2799 DAG.getUNDEF(PairTy), FoldedMask);
2800 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SingleTy, FoldedShuff,
2801 DAG.getConstant(0, dl, MVT::i32));
2802 };
2803
2804 auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> {
2805 while (V.getOpcode() == ISD::BITCAST)
2806 V = V.getOperand(0);
2807 if (V.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2808 return std::nullopt;
2809 return SubVectorInfo(V.getOperand(0),
2810 !cast<ConstantSDNode>(V.getOperand(1))->isZero());
2811 };
2812
2813 for (SDNode *N : Nodes) {
2814 if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
2815 continue;
2816 EVT ResTy = N->getValueType(0);
2817 if (ResTy.getVectorElementType() != MVT::i8)
2818 continue;
2819 if (ResTy.getVectorNumElements() != HwLen)
2820 continue;
2821
2822 SDValue V0 = N->getOperand(0);
2823 SDValue V1 = N->getOperand(1);
2824 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
2825 continue;
2826 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
2827 continue;
2828 if (V0.getValueType() != ResTy || V1.getValueType() != ResTy)
2829 continue;
2830
2831 // Check if all operands of the two operand shuffles are extract_subvectors
2832 // from the same vector pair.
2833 auto V0A = getSourceInfo(V0.getOperand(0));
2834 if (!V0A.has_value())
2835 continue;
2836 auto V0B = getSourceInfo(V0.getOperand(1));
2837 if (!V0B.has_value() || V0B->Src != V0A->Src)
2838 continue;
2839 auto V1A = getSourceInfo(V1.getOperand(0));
2840 if (!V1A.has_value() || V1A->Src != V0A->Src)
2841 continue;
2842 auto V1B = getSourceInfo(V1.getOperand(1));
2843 if (!V1B.has_value() || V1B->Src != V0A->Src)
2844 continue;
2845
2846 // The source must be a pair. This should be guaranteed here,
2847 // but check just in case.
2848 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2849
2850 MapType OpMap = {
2851 {V0.getOperand(0), V0A->HalfIdx * HwLen},
2852 {V0.getOperand(1), V0B->HalfIdx * HwLen},
2853 {V1.getOperand(0), V1A->HalfIdx * HwLen},
2854 {V1.getOperand(1), V1B->HalfIdx * HwLen},
2855 };
2856 SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap));
2857 ReplaceNode(N, NewS.getNode());
2858 }
2859}
2860
2861void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *N) {
2862 HvxSelector(*this, *CurDAG).selectExtractSubvector(N);
2863}
2864
2865void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2866 HvxSelector(*this, *CurDAG).selectShuffle(N);
2867}
2868
2869void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2870 HvxSelector(*this, *CurDAG).selectRor(N);
2871}
2872
2873void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2874 HvxSelector(*this, *CurDAG).selectVAlign(N);
2875}
2876
2878 const SDLoc &dl(N);
2879 SDValue Chain = N->getOperand(0);
2880 SDValue Address = N->getOperand(2);
2881 SDValue Predicate = N->getOperand(3);
2882 SDValue Base = N->getOperand(4);
2883 SDValue Modifier = N->getOperand(5);
2884 SDValue Offset = N->getOperand(6);
2885 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2886
2887 unsigned Opcode;
2888 unsigned IntNo = N->getConstantOperandVal(1);
2889 switch (IntNo) {
2890 default:
2891 llvm_unreachable("Unexpected HVX gather intrinsic.");
2892 case Intrinsic::hexagon_V6_vgathermhq:
2893 case Intrinsic::hexagon_V6_vgathermhq_128B:
2894 Opcode = Hexagon::V6_vgathermhq_pseudo;
2895 break;
2896 case Intrinsic::hexagon_V6_vgathermwq:
2897 case Intrinsic::hexagon_V6_vgathermwq_128B:
2898 Opcode = Hexagon::V6_vgathermwq_pseudo;
2899 break;
2900 case Intrinsic::hexagon_V6_vgathermhwq:
2901 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2902 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2903 break;
2904 }
2905
2906 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2907 SDValue Ops[] = { Address, ImmOperand,
2908 Predicate, Base, Modifier, Offset, Chain };
2909 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2910
2911 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2912 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2913
2914 ReplaceNode(N, Result);
2915}
2916
2918 const SDLoc &dl(N);
2919 SDValue Chain = N->getOperand(0);
2920 SDValue Address = N->getOperand(2);
2921 SDValue Base = N->getOperand(3);
2922 SDValue Modifier = N->getOperand(4);
2923 SDValue Offset = N->getOperand(5);
2924 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2925
2926 unsigned Opcode;
2927 unsigned IntNo = N->getConstantOperandVal(1);
2928 switch (IntNo) {
2929 default:
2930 llvm_unreachable("Unexpected HVX gather intrinsic.");
2931 case Intrinsic::hexagon_V6_vgathermh:
2932 case Intrinsic::hexagon_V6_vgathermh_128B:
2933 Opcode = Hexagon::V6_vgathermh_pseudo;
2934 break;
2935 case Intrinsic::hexagon_V6_vgathermw:
2936 case Intrinsic::hexagon_V6_vgathermw_128B:
2937 Opcode = Hexagon::V6_vgathermw_pseudo;
2938 break;
2939 case Intrinsic::hexagon_V6_vgathermhw:
2940 case Intrinsic::hexagon_V6_vgathermhw_128B:
2941 Opcode = Hexagon::V6_vgathermhw_pseudo;
2942 break;
2943 }
2944
2945 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2946 SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
2947 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2948
2949 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2950 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2951
2952 ReplaceNode(N, Result);
2953}
2954
2956 unsigned IID = N->getConstantOperandVal(0);
2957 SDNode *Result;
2958 switch (IID) {
2959 case Intrinsic::hexagon_V6_vaddcarry: {
2960 std::array<SDValue, 3> Ops = {
2961 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2962 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2963 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2964 break;
2965 }
2966 case Intrinsic::hexagon_V6_vaddcarry_128B: {
2967 std::array<SDValue, 3> Ops = {
2968 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2969 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2970 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2971 break;
2972 }
2973 case Intrinsic::hexagon_V6_vsubcarry: {
2974 std::array<SDValue, 3> Ops = {
2975 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2976 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2977 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2978 break;
2979 }
2980 case Intrinsic::hexagon_V6_vsubcarry_128B: {
2981 std::array<SDValue, 3> Ops = {
2982 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2983 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2984 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2985 break;
2986 }
2987 default:
2988 llvm_unreachable("Unexpected HVX dual output intrinsic.");
2989 }
2990 ReplaceUses(N, Result);
2991 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
2992 ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
2994}
unsigned SubReg
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
ReachingDefAnalysis InstSet InstSet & Ignore
Function Alias Analysis Results
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
This file implements the BitVector class.
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:298
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static bool isIdentity(ArrayRef< int > Mask)
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
static bool isUndef(ArrayRef< int > Mask)
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
static bool isLowHalfOnly(ArrayRef< int > Mask)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
static bool isUndef(const MachineInstr &MI)
nvptx lower args
#define P(N)
const NodeList & List
Definition: RDFGraph.cpp:200
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
raw_pwrite_stream & OS
static LLVM_ATTRIBUTE_ALWAYS_INLINE MVT::SimpleValueType getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex)
getSimpleVT - Decode a value in MatcherTable, if it's a VBR encoded value, use GetVBR to decode it.
This file implements a set that has insertion order iteration characteristics.
Stack Slot Coloring
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:77
xray Insert XRay ops
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:224
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:200
iterator end() const
Definition: ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:231
const T * data() const
Definition: ArrayRef.h:144
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:165
iterator end()
Definition: DenseMap.h:81
Tagged union holding either a T or a Error.
Definition: Error.h:485
bool empty() const
Definition: Function.h:857
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:5051
size_t size() const
Definition: Function.h:856
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
unsigned getVectorLength() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Machine Value Type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:303
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:424
iterator begin() const
Definition: ArrayRef.h:347
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:417
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
bool insert(SUnit *SU)
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:99
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI void dumpr() const
Dump (recursively) this node and its use-def subgraph.
const SDValue & getOperand(unsigned Num) const
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:868
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:570
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:566
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVMContext * getContext() const
Definition: SelectionDAG.h:511
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
A vector that has set insertion semantics.
Definition: SetVector.h:59
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
void insert_range(Range &&R)
Definition: SetVector.h:193
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition: SetVector.h:93
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:279
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void resize(size_type N)
Definition: SmallVector.h:639
void push_back(const T &Elt)
Definition: SmallVector.h:414
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:287
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
TargetInstrInfo - Interface to description of machine instruction set.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:571
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ Undef
Value of the register doesn't matter.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
LLVM_ABI const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:477
@ Length
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:307
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
@ Done
Definition: Threading.h:60
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2147
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
@ None
Definition: CodeGenData.h:107
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:126
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1879
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1854
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:559
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
MaskT vshuffvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
MaskT vpack(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > hi(ArrayRef< int > Vuu)
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdealb4w(ArrayRef< int > Vu, ArrayRef< int > Vv)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > lo(ArrayRef< int > Vuu)
MaskT vdealvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
Extended Value Type.
Definition: ValueTypes.h:35
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
MVT getSingleVT(MVT ElemTy) const
static SmallVector< uint32_t, 8 > completeToPerfect(ArrayRef< uint32_t > Completions, unsigned Width)
HexagonDAGToDAGISel & ISel
const HexagonTargetLowering & Lower
void selectExtractSubvector(SDNode *N)
static SmallVector< uint32_t, 8 > getPerfectCompletions(ShuffleMask SM, unsigned Width)
MVT getPairVT(MVT ElemTy) const
const HexagonSubtarget & HST
static std::optional< int > rotationDistance(ShuffleMask SM, unsigned WrapAt)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
std::size_t operator()(SDValue V) const