LLVM 21.0.0git
DependenceAnalysis.cpp
Go to the documentation of this file.
1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10// accesses. Currently, it is an (incomplete) implementation of the approach
11// described in
12//
13// Practical Dependence Testing
14// Goff, Kennedy, Tseng
15// PLDI 1991
16//
17// There's a single entry point that analyzes the dependence between a pair
18// of memory references in a function, returning either NULL, for no dependence,
19// or a more-or-less detailed description of the dependence between them.
20//
21// Currently, the implementation cannot propagate constraints between
22// coupled RDIV subscripts and lacks a multi-subscript MIV test.
23// Both of these are conservative weaknesses;
24// that is, not a source of correctness problems.
25//
26// Since Clang linearizes some array subscripts, the dependence
27// analysis is using SCEV->delinearize to recover the representation of multiple
28// subscripts, and thus avoid the more expensive and less precise MIV tests. The
29// delinearization is controlled by the flag -da-delinearize.
30//
31// We should pay some careful attention to the possibility of integer overflow
32// in the implementation of the various tests. This could happen with Add,
33// Subtract, or Multiply, with both APInt's and SCEV's.
34//
35// Some non-linear subscript pairs can be handled by the GCD test
36// (and perhaps other tests).
37// Should explore how often these things occur.
38//
39// Finally, it seems like certain test cases expose weaknesses in the SCEV
40// simplification, especially in the handling of sign and zero extensions.
41// It could be useful to spend time exploring these.
42//
43// Please note that this is work in progress and the interface is subject to
44// change.
45//
46//===----------------------------------------------------------------------===//
47// //
48// In memory of Ken Kennedy, 1945 - 2007 //
49// //
50//===----------------------------------------------------------------------===//
51
53#include "llvm/ADT/Statistic.h"
61#include "llvm/IR/Module.h"
64#include "llvm/Support/Debug.h"
67
68using namespace llvm;
69
70#define DEBUG_TYPE "da"
71
72//===----------------------------------------------------------------------===//
73// statistics
74
75STATISTIC(TotalArrayPairs, "Array pairs tested");
76STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
77STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
78STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
79STATISTIC(ZIVapplications, "ZIV applications");
80STATISTIC(ZIVindependence, "ZIV independence");
81STATISTIC(StrongSIVapplications, "Strong SIV applications");
82STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
83STATISTIC(StrongSIVindependence, "Strong SIV independence");
84STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
85STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
86STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
87STATISTIC(ExactSIVapplications, "Exact SIV applications");
88STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
89STATISTIC(ExactSIVindependence, "Exact SIV independence");
90STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
91STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
92STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
93STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
94STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
95STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
96STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
97STATISTIC(DeltaApplications, "Delta applications");
98STATISTIC(DeltaSuccesses, "Delta successes");
99STATISTIC(DeltaIndependence, "Delta independence");
100STATISTIC(DeltaPropagations, "Delta propagations");
101STATISTIC(GCDapplications, "GCD applications");
102STATISTIC(GCDsuccesses, "GCD successes");
103STATISTIC(GCDindependence, "GCD independence");
104STATISTIC(BanerjeeApplications, "Banerjee applications");
105STATISTIC(BanerjeeIndependence, "Banerjee independence");
106STATISTIC(BanerjeeSuccesses, "Banerjee successes");
107
108static cl::opt<bool>
109 Delinearize("da-delinearize", cl::init(true), cl::Hidden,
110 cl::desc("Try to delinearize array references."));
112 "da-disable-delinearization-checks", cl::Hidden,
113 cl::desc(
114 "Disable checks that try to statically verify validity of "
115 "delinearized subscripts. Enabling this option may result in incorrect "
116 "dependence vectors for languages that allow the subscript of one "
117 "dimension to underflow or overflow into another dimension."));
118
120 "da-miv-max-level-threshold", cl::init(7), cl::Hidden,
121 cl::desc("Maximum depth allowed for the recursive algorithm used to "
122 "explore MIV direction vectors."));
123
124//===----------------------------------------------------------------------===//
125// basics
126
129 auto &AA = FAM.getResult<AAManager>(F);
131 auto &LI = FAM.getResult<LoopAnalysis>(F);
132 return DependenceInfo(&F, &AA, &SE, &LI);
133}
134
135AnalysisKey DependenceAnalysis::Key;
136
138 "Dependence Analysis", true, true)
144
146
148 : FunctionPass(ID) {
150}
151
154}
155
157 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
158 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
159 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
160 info.reset(new DependenceInfo(&F, &AA, &SE, &LI));
161 return false;
162}
163
165
167
169 AU.setPreservesAll();
173}
174
175// Used to test the dependence analyzer.
176// Looks through the function, noting instructions that may access memory.
177// Calls depends() on every possible pair and prints out the result.
178// Ignores all other instructions.
180 ScalarEvolution &SE, bool NormalizeResults) {
181 auto *F = DA->getFunction();
182 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
183 ++SrcI) {
184 if (SrcI->mayReadOrWriteMemory()) {
185 for (inst_iterator DstI = SrcI, DstE = inst_end(F);
186 DstI != DstE; ++DstI) {
187 if (DstI->mayReadOrWriteMemory()) {
188 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
189 OS << " da analyze - ";
190 if (auto D = DA->depends(&*SrcI, &*DstI)) {
191 // Normalize negative direction vectors if required by clients.
192 if (NormalizeResults && D->normalize(&SE))
193 OS << "normalized - ";
194 D->dump(OS);
195 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
196 if (D->isSplitable(Level)) {
197 OS << " da analyze - split level = " << Level;
198 OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
199 OS << "!\n";
200 }
201 }
202 }
203 else
204 OS << "none!\n";
205 }
206 }
207 }
208 }
209}
210
212 const Module *) const {
213 dumpExampleDependence(OS, info.get(),
214 getAnalysis<ScalarEvolutionWrapperPass>().getSE(), false);
215}
216
219 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
220 << "':\n";
223 NormalizeResults);
224 return PreservedAnalyses::all();
225}
226
227//===----------------------------------------------------------------------===//
228// Dependence methods
229
230// Returns true if this is an input dependence.
233}
234
235
236// Returns true if this is an output dependence.
239}
240
241
242// Returns true if this is an flow (aka true) dependence.
243bool Dependence::isFlow() const {
245}
246
247
248// Returns true if this is an anti dependence.
249bool Dependence::isAnti() const {
251}
252
253
254// Returns true if a particular level is scalar; that is,
255// if no subscript in the source or destination mention the induction
256// variable associated with the loop at this level.
257// Leave this out of line, so it will serve as a virtual method anchor
258bool Dependence::isScalar(unsigned level) const {
259 return false;
260}
261
262
263//===----------------------------------------------------------------------===//
264// FullDependence methods
265
267 bool PossiblyLoopIndependent,
268 unsigned CommonLevels)
269 : Dependence(Source, Destination), Levels(CommonLevels),
270 LoopIndependent(PossiblyLoopIndependent) {
271 Consistent = true;
272 if (CommonLevels)
273 DV = std::make_unique<DVEntry[]>(CommonLevels);
274}
275
276// FIXME: in some cases the meaning of a negative direction vector
277// may not be straightforward, e.g.,
278// for (int i = 0; i < 32; ++i) {
279// Src: A[i] = ...;
280// Dst: use(A[31 - i]);
281// }
282// The dependency is
283// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
284// anti { Dst[i] -> Src[31 - i] : when i < 16 },
285// -- hence a [<>].
286// As long as a dependence result contains '>' ('<>', '<=>', "*"), it
287// means that a reversed/normalized dependence needs to be considered
288// as well. Nevertheless, current isDirectionNegative() only returns
289// true with a '>' or '>=' dependency for ease of canonicalizing the
290// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.
292 for (unsigned Level = 1; Level <= Levels; ++Level) {
293 unsigned char Direction = DV[Level - 1].Direction;
295 continue;
298 return true;
299 return false;
300 }
301 return false;
302}
303
305 if (!isDirectionNegative())
306 return false;
307
308 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
309 dump(dbgs()););
310 std::swap(Src, Dst);
311 for (unsigned Level = 1; Level <= Levels; ++Level) {
312 unsigned char Direction = DV[Level - 1].Direction;
313 // Reverse the direction vector, this means LT becomes GT
314 // and GT becomes LT.
315 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
317 RevDirection |= Dependence::DVEntry::GT;
319 RevDirection |= Dependence::DVEntry::LT;
320 DV[Level - 1].Direction = RevDirection;
321 // Reverse the dependence distance as well.
322 if (DV[Level - 1].Distance != nullptr)
323 DV[Level - 1].Distance =
324 SE->getNegativeSCEV(DV[Level - 1].Distance);
325 }
326
327 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
328 dump(dbgs()););
329 return true;
330}
331
332// The rest are simple getters that hide the implementation.
333
334// getDirection - Returns the direction associated with a particular level.
335unsigned FullDependence::getDirection(unsigned Level) const {
336 assert(0 < Level && Level <= Levels && "Level out of range");
337 return DV[Level - 1].Direction;
338}
339
340
341// Returns the distance (or NULL) associated with a particular level.
342const SCEV *FullDependence::getDistance(unsigned Level) const {
343 assert(0 < Level && Level <= Levels && "Level out of range");
344 return DV[Level - 1].Distance;
345}
346
347
348// Returns true if a particular level is scalar; that is,
349// if no subscript in the source or destination mention the induction
350// variable associated with the loop at this level.
351bool FullDependence::isScalar(unsigned Level) const {
352 assert(0 < Level && Level <= Levels && "Level out of range");
353 return DV[Level - 1].Scalar;
354}
355
356
357// Returns true if peeling the first iteration from this loop
358// will break this dependence.
359bool FullDependence::isPeelFirst(unsigned Level) const {
360 assert(0 < Level && Level <= Levels && "Level out of range");
361 return DV[Level - 1].PeelFirst;
362}
363
364
365// Returns true if peeling the last iteration from this loop
366// will break this dependence.
367bool FullDependence::isPeelLast(unsigned Level) const {
368 assert(0 < Level && Level <= Levels && "Level out of range");
369 return DV[Level - 1].PeelLast;
370}
371
372
373// Returns true if splitting this loop will break the dependence.
374bool FullDependence::isSplitable(unsigned Level) const {
375 assert(0 < Level && Level <= Levels && "Level out of range");
376 return DV[Level - 1].Splitable;
377}
378
379
380//===----------------------------------------------------------------------===//
381// DependenceInfo::Constraint methods
382
383// If constraint is a point <X, Y>, returns X.
384// Otherwise assert.
385const SCEV *DependenceInfo::Constraint::getX() const {
386 assert(Kind == Point && "Kind should be Point");
387 return A;
388}
389
390
391// If constraint is a point <X, Y>, returns Y.
392// Otherwise assert.
393const SCEV *DependenceInfo::Constraint::getY() const {
394 assert(Kind == Point && "Kind should be Point");
395 return B;
396}
397
398
399// If constraint is a line AX + BY = C, returns A.
400// Otherwise assert.
401const SCEV *DependenceInfo::Constraint::getA() const {
402 assert((Kind == Line || Kind == Distance) &&
403 "Kind should be Line (or Distance)");
404 return A;
405}
406
407
408// If constraint is a line AX + BY = C, returns B.
409// Otherwise assert.
410const SCEV *DependenceInfo::Constraint::getB() const {
411 assert((Kind == Line || Kind == Distance) &&
412 "Kind should be Line (or Distance)");
413 return B;
414}
415
416
417// If constraint is a line AX + BY = C, returns C.
418// Otherwise assert.
419const SCEV *DependenceInfo::Constraint::getC() const {
420 assert((Kind == Line || Kind == Distance) &&
421 "Kind should be Line (or Distance)");
422 return C;
423}
424
425
426// If constraint is a distance, returns D.
427// Otherwise assert.
428const SCEV *DependenceInfo::Constraint::getD() const {
429 assert(Kind == Distance && "Kind should be Distance");
430 return SE->getNegativeSCEV(C);
431}
432
433
434// Returns the loop associated with this constraint.
435const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
436 assert((Kind == Distance || Kind == Line || Kind == Point) &&
437 "Kind should be Distance, Line, or Point");
438 return AssociatedLoop;
439}
440
441void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
442 const Loop *CurLoop) {
443 Kind = Point;
444 A = X;
445 B = Y;
446 AssociatedLoop = CurLoop;
447}
448
449void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
450 const SCEV *CC, const Loop *CurLoop) {
451 Kind = Line;
452 A = AA;
453 B = BB;
454 C = CC;
455 AssociatedLoop = CurLoop;
456}
457
458void DependenceInfo::Constraint::setDistance(const SCEV *D,
459 const Loop *CurLoop) {
460 Kind = Distance;
461 A = SE->getOne(D->getType());
462 B = SE->getNegativeSCEV(A);
463 C = SE->getNegativeSCEV(D);
464 AssociatedLoop = CurLoop;
465}
466
467void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
468
469void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
470 SE = NewSE;
471 Kind = Any;
472}
473
474#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
475// For debugging purposes. Dumps the constraint out to OS.
476LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
477 if (isEmpty())
478 OS << " Empty\n";
479 else if (isAny())
480 OS << " Any\n";
481 else if (isPoint())
482 OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
483 else if (isDistance())
484 OS << " Distance is " << *getD() <<
485 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
486 else if (isLine())
487 OS << " Line is " << *getA() << "*X + " <<
488 *getB() << "*Y = " << *getC() << "\n";
489 else
490 llvm_unreachable("unknown constraint type in Constraint::dump");
491}
492#endif
493
494
495// Updates X with the intersection
496// of the Constraints X and Y. Returns true if X has changed.
497// Corresponds to Figure 4 from the paper
498//
499// Practical Dependence Testing
500// Goff, Kennedy, Tseng
501// PLDI 1991
502bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
503 ++DeltaApplications;
504 LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
505 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
506 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
507 assert(!Y->isPoint() && "Y must not be a Point");
508 if (X->isAny()) {
509 if (Y->isAny())
510 return false;
511 *X = *Y;
512 return true;
513 }
514 if (X->isEmpty())
515 return false;
516 if (Y->isEmpty()) {
517 X->setEmpty();
518 return true;
519 }
520
521 if (X->isDistance() && Y->isDistance()) {
522 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n");
523 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
524 return false;
525 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
526 X->setEmpty();
527 ++DeltaSuccesses;
528 return true;
529 }
530 // Hmmm, interesting situation.
531 // I guess if either is constant, keep it and ignore the other.
532 if (isa<SCEVConstant>(Y->getD())) {
533 *X = *Y;
534 return true;
535 }
536 return false;
537 }
538
539 // At this point, the pseudo-code in Figure 4 of the paper
540 // checks if (X->isPoint() && Y->isPoint()).
541 // This case can't occur in our implementation,
542 // since a Point can only arise as the result of intersecting
543 // two Line constraints, and the right-hand value, Y, is never
544 // the result of an intersection.
545 assert(!(X->isPoint() && Y->isPoint()) &&
546 "We shouldn't ever see X->isPoint() && Y->isPoint()");
547
548 if (X->isLine() && Y->isLine()) {
549 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n");
550 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
551 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
552 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
553 // slopes are equal, so lines are parallel
554 LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
555 Prod1 = SE->getMulExpr(X->getC(), Y->getB());
556 Prod2 = SE->getMulExpr(X->getB(), Y->getC());
557 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
558 return false;
559 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
560 X->setEmpty();
561 ++DeltaSuccesses;
562 return true;
563 }
564 return false;
565 }
566 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
567 // slopes differ, so lines intersect
568 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
569 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
570 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
571 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
572 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
573 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
574 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
575 const SCEVConstant *C1A2_C2A1 =
576 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
577 const SCEVConstant *C1B2_C2B1 =
578 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
579 const SCEVConstant *A1B2_A2B1 =
580 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
581 const SCEVConstant *A2B1_A1B2 =
582 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
583 if (!C1B2_C2B1 || !C1A2_C2A1 ||
584 !A1B2_A2B1 || !A2B1_A1B2)
585 return false;
586 APInt Xtop = C1B2_C2B1->getAPInt();
587 APInt Xbot = A1B2_A2B1->getAPInt();
588 APInt Ytop = C1A2_C2A1->getAPInt();
589 APInt Ybot = A2B1_A1B2->getAPInt();
590 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
591 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
592 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
593 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
594 APInt Xq = Xtop; // these need to be initialized, even
595 APInt Xr = Xtop; // though they're just going to be overwritten
596 APInt::sdivrem(Xtop, Xbot, Xq, Xr);
597 APInt Yq = Ytop;
598 APInt Yr = Ytop;
599 APInt::sdivrem(Ytop, Ybot, Yq, Yr);
600 if (Xr != 0 || Yr != 0) {
601 X->setEmpty();
602 ++DeltaSuccesses;
603 return true;
604 }
605 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
606 if (Xq.slt(0) || Yq.slt(0)) {
607 X->setEmpty();
608 ++DeltaSuccesses;
609 return true;
610 }
611 if (const SCEVConstant *CUB =
612 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
613 const APInt &UpperBound = CUB->getAPInt();
614 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
615 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
616 X->setEmpty();
617 ++DeltaSuccesses;
618 return true;
619 }
620 }
621 X->setPoint(SE->getConstant(Xq),
622 SE->getConstant(Yq),
623 X->getAssociatedLoop());
624 ++DeltaSuccesses;
625 return true;
626 }
627 return false;
628 }
629
630 // if (X->isLine() && Y->isPoint()) This case can't occur.
631 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
632
633 if (X->isPoint() && Y->isLine()) {
634 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n");
635 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
636 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
637 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
638 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
639 return false;
640 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
641 X->setEmpty();
642 ++DeltaSuccesses;
643 return true;
644 }
645 return false;
646 }
647
648 llvm_unreachable("shouldn't reach the end of Constraint intersection");
649 return false;
650}
651
652
653//===----------------------------------------------------------------------===//
654// DependenceInfo methods
655
656// For debugging purposes. Dumps a dependence to OS.
658 bool Splitable = false;
659 if (isConfused())
660 OS << "confused";
661 else {
662 if (isConsistent())
663 OS << "consistent ";
664 if (isFlow())
665 OS << "flow";
666 else if (isOutput())
667 OS << "output";
668 else if (isAnti())
669 OS << "anti";
670 else if (isInput())
671 OS << "input";
672 unsigned Levels = getLevels();
673 OS << " [";
674 for (unsigned II = 1; II <= Levels; ++II) {
675 if (isSplitable(II))
676 Splitable = true;
677 if (isPeelFirst(II))
678 OS << 'p';
679 const SCEV *Distance = getDistance(II);
680 if (Distance)
681 OS << *Distance;
682 else if (isScalar(II))
683 OS << "S";
684 else {
685 unsigned Direction = getDirection(II);
686 if (Direction == DVEntry::ALL)
687 OS << "*";
688 else {
690 OS << "<";
692 OS << "=";
694 OS << ">";
695 }
696 }
697 if (isPeelLast(II))
698 OS << 'p';
699 if (II < Levels)
700 OS << " ";
701 }
702 if (isLoopIndependent())
703 OS << "|<";
704 OS << "]";
705 if (Splitable)
706 OS << " splitable";
707 }
708 OS << "!\n";
709}
710
711// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
712// underlaying objects. If LocA and LocB are known to not alias (for any reason:
713// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
714// Otherwise the underlying objects are checked to see if they point to
715// different identifiable objects.
717 const DataLayout &DL,
718 const MemoryLocation &LocA,
719 const MemoryLocation &LocB) {
720 // Check the original locations (minus size) for noalias, which can happen for
721 // tbaa, incompatible underlying object locations, etc.
722 MemoryLocation LocAS =
724 MemoryLocation LocBS =
726 BatchAAResults BAA(*AA);
728
729 if (BAA.isNoAlias(LocAS, LocBS))
731
732 // Check the underlying objects are the same
733 const Value *AObj = getUnderlyingObject(LocA.Ptr);
734 const Value *BObj = getUnderlyingObject(LocB.Ptr);
735
736 // If the underlying objects are the same, they must alias
737 if (AObj == BObj)
739
740 // We may have hit the recursion limit for underlying objects, or have
741 // underlying objects where we don't know they will alias.
742 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))
744
745 // Otherwise we know the objects are different and both identified objects so
746 // must not alias.
748}
749
750// Returns true if the load or store can be analyzed. Atomic and volatile
751// operations have properties which this analysis does not understand.
752static
754 if (const LoadInst *LI = dyn_cast<LoadInst>(I))
755 return LI->isUnordered();
756 else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
757 return SI->isUnordered();
758 return false;
759}
760
761
762// Examines the loop nesting of the Src and Dst
763// instructions and establishes their shared loops. Sets the variables
764// CommonLevels, SrcLevels, and MaxLevels.
765// The source and destination instructions needn't be contained in the same
766// loop. The routine establishNestingLevels finds the level of most deeply
767// nested loop that contains them both, CommonLevels. An instruction that's
768// not contained in a loop is at level = 0. MaxLevels is equal to the level
769// of the source plus the level of the destination, minus CommonLevels.
770// This lets us allocate vectors MaxLevels in length, with room for every
771// distinct loop referenced in both the source and destination subscripts.
772// The variable SrcLevels is the nesting depth of the source instruction.
773// It's used to help calculate distinct loops referenced by the destination.
774// Here's the map from loops to levels:
775// 0 - unused
776// 1 - outermost common loop
777// ... - other common loops
778// CommonLevels - innermost common loop
779// ... - loops containing Src but not Dst
780// SrcLevels - innermost loop containing Src but not Dst
781// ... - loops containing Dst but not Src
782// MaxLevels - innermost loops containing Dst but not Src
783// Consider the follow code fragment:
784// for (a = ...) {
785// for (b = ...) {
786// for (c = ...) {
787// for (d = ...) {
788// A[] = ...;
789// }
790// }
791// for (e = ...) {
792// for (f = ...) {
793// for (g = ...) {
794// ... = A[];
795// }
796// }
797// }
798// }
799// }
800// If we're looking at the possibility of a dependence between the store
801// to A (the Src) and the load from A (the Dst), we'll note that they
802// have 2 loops in common, so CommonLevels will equal 2 and the direction
803// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
804// A map from loop names to loop numbers would look like
805// a - 1
806// b - 2 = CommonLevels
807// c - 3
808// d - 4 = SrcLevels
809// e - 5
810// f - 6
811// g - 7 = MaxLevels
812void DependenceInfo::establishNestingLevels(const Instruction *Src,
813 const Instruction *Dst) {
814 const BasicBlock *SrcBlock = Src->getParent();
815 const BasicBlock *DstBlock = Dst->getParent();
816 unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
817 unsigned DstLevel = LI->getLoopDepth(DstBlock);
818 const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
819 const Loop *DstLoop = LI->getLoopFor(DstBlock);
820 SrcLevels = SrcLevel;
821 MaxLevels = SrcLevel + DstLevel;
822 while (SrcLevel > DstLevel) {
823 SrcLoop = SrcLoop->getParentLoop();
824 SrcLevel--;
825 }
826 while (DstLevel > SrcLevel) {
827 DstLoop = DstLoop->getParentLoop();
828 DstLevel--;
829 }
830 while (SrcLoop != DstLoop) {
831 SrcLoop = SrcLoop->getParentLoop();
832 DstLoop = DstLoop->getParentLoop();
833 SrcLevel--;
834 }
835 CommonLevels = SrcLevel;
836 MaxLevels -= CommonLevels;
837}
838
839
840// Given one of the loops containing the source, return
841// its level index in our numbering scheme.
842unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
843 return SrcLoop->getLoopDepth();
844}
845
846
847// Given one of the loops containing the destination,
848// return its level index in our numbering scheme.
849unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
850 unsigned D = DstLoop->getLoopDepth();
851 if (D > CommonLevels)
852 // This tries to make sure that we assign unique numbers to src and dst when
853 // the memory accesses reside in different loops that have the same depth.
854 return D - CommonLevels + SrcLevels;
855 else
856 return D;
857}
858
859
860// Returns true if Expression is loop invariant in LoopNest.
861bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
862 const Loop *LoopNest) const {
863 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
864 // any loop as invariant, because we only consier expression evaluation at a
865 // specific position (where the array access takes place), and not across the
866 // entire function.
867 if (!LoopNest)
868 return true;
869
870 // If the expression is invariant in the outermost loop of the loop nest, it
871 // is invariant anywhere in the loop nest.
873}
874
875
876
877// Finds the set of loops from the LoopNest that
878// have a level <= CommonLevels and are referred to by the SCEV Expression.
879void DependenceInfo::collectCommonLoops(const SCEV *Expression,
880 const Loop *LoopNest,
881 SmallBitVector &Loops) const {
882 while (LoopNest) {
883 unsigned Level = LoopNest->getLoopDepth();
884 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
885 Loops.set(Level);
886 LoopNest = LoopNest->getParentLoop();
887 }
888}
889
890void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
891
892 unsigned widestWidthSeen = 0;
893 Type *widestType;
894
895 // Go through each pair and find the widest bit to which we need
896 // to extend all of them.
897 for (Subscript *Pair : Pairs) {
898 const SCEV *Src = Pair->Src;
899 const SCEV *Dst = Pair->Dst;
900 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
901 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
902 if (SrcTy == nullptr || DstTy == nullptr) {
903 assert(SrcTy == DstTy && "This function only unify integer types and "
904 "expect Src and Dst share the same type "
905 "otherwise.");
906 continue;
907 }
908 if (SrcTy->getBitWidth() > widestWidthSeen) {
909 widestWidthSeen = SrcTy->getBitWidth();
910 widestType = SrcTy;
911 }
912 if (DstTy->getBitWidth() > widestWidthSeen) {
913 widestWidthSeen = DstTy->getBitWidth();
914 widestType = DstTy;
915 }
916 }
917
918
919 assert(widestWidthSeen > 0);
920
921 // Now extend each pair to the widest seen.
922 for (Subscript *Pair : Pairs) {
923 const SCEV *Src = Pair->Src;
924 const SCEV *Dst = Pair->Dst;
925 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
926 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
927 if (SrcTy == nullptr || DstTy == nullptr) {
928 assert(SrcTy == DstTy && "This function only unify integer types and "
929 "expect Src and Dst share the same type "
930 "otherwise.");
931 continue;
932 }
933 if (SrcTy->getBitWidth() < widestWidthSeen)
934 // Sign-extend Src to widestType
935 Pair->Src = SE->getSignExtendExpr(Src, widestType);
936 if (DstTy->getBitWidth() < widestWidthSeen) {
937 // Sign-extend Dst to widestType
938 Pair->Dst = SE->getSignExtendExpr(Dst, widestType);
939 }
940 }
941}
942
943// removeMatchingExtensions - Examines a subscript pair.
944// If the source and destination are identically sign (or zero)
945// extended, it strips off the extension in an effect to simplify
946// the actual analysis.
947void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
948 const SCEV *Src = Pair->Src;
949 const SCEV *Dst = Pair->Dst;
950 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
951 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
952 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src);
953 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst);
954 const SCEV *SrcCastOp = SrcCast->getOperand();
955 const SCEV *DstCastOp = DstCast->getOperand();
956 if (SrcCastOp->getType() == DstCastOp->getType()) {
957 Pair->Src = SrcCastOp;
958 Pair->Dst = DstCastOp;
959 }
960 }
961}
962
963// Examine the scev and return true iff it's affine.
964// Collect any loops mentioned in the set of "Loops".
965bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
966 SmallBitVector &Loops, bool IsSrc) {
967 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
968 if (!AddRec)
969 return isLoopInvariant(Expr, LoopNest);
970
971 // The AddRec must depend on one of the containing loops. Otherwise,
972 // mapSrcLoop and mapDstLoop return indices outside the intended range. This
973 // can happen when a subscript in one loop references an IV from a sibling
974 // loop that could not be replaced with a concrete exit value by
975 // getSCEVAtScope.
976 const Loop *L = LoopNest;
977 while (L && AddRec->getLoop() != L)
978 L = L->getParentLoop();
979 if (!L)
980 return false;
981
982 const SCEV *Start = AddRec->getStart();
983 const SCEV *Step = AddRec->getStepRecurrence(*SE);
984 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
985 if (!isa<SCEVCouldNotCompute>(UB)) {
986 if (SE->getTypeSizeInBits(Start->getType()) <
987 SE->getTypeSizeInBits(UB->getType())) {
988 if (!AddRec->getNoWrapFlags())
989 return false;
990 }
991 }
992 if (!isLoopInvariant(Step, LoopNest))
993 return false;
994 if (IsSrc)
995 Loops.set(mapSrcLoop(AddRec->getLoop()));
996 else
997 Loops.set(mapDstLoop(AddRec->getLoop()));
998 return checkSubscript(Start, LoopNest, Loops, IsSrc);
999}
1000
1001// Examine the scev and return true iff it's linear.
1002// Collect any loops mentioned in the set of "Loops".
1003bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
1005 return checkSubscript(Src, LoopNest, Loops, true);
1006}
1007
1008// Examine the scev and return true iff it's linear.
1009// Collect any loops mentioned in the set of "Loops".
1010bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
1012 return checkSubscript(Dst, LoopNest, Loops, false);
1013}
1014
1015
1016// Examines the subscript pair (the Src and Dst SCEVs)
1017// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
1018// Collects the associated loops in a set.
1019DependenceInfo::Subscript::ClassificationKind
1020DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
1021 const SCEV *Dst, const Loop *DstLoopNest,
1023 SmallBitVector SrcLoops(MaxLevels + 1);
1024 SmallBitVector DstLoops(MaxLevels + 1);
1025 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
1026 return Subscript::NonLinear;
1027 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
1028 return Subscript::NonLinear;
1029 Loops = SrcLoops;
1030 Loops |= DstLoops;
1031 unsigned N = Loops.count();
1032 if (N == 0)
1033 return Subscript::ZIV;
1034 if (N == 1)
1035 return Subscript::SIV;
1036 if (N == 2 && (SrcLoops.count() == 0 ||
1037 DstLoops.count() == 0 ||
1038 (SrcLoops.count() == 1 && DstLoops.count() == 1)))
1039 return Subscript::RDIV;
1040 return Subscript::MIV;
1041}
1042
1043
1044// A wrapper around SCEV::isKnownPredicate.
1045// Looks for cases where we're interested in comparing for equality.
1046// If both X and Y have been identically sign or zero extended,
1047// it strips off the (confusing) extensions before invoking
1048// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
1049// will be similarly updated.
1050//
1051// If SCEV::isKnownPredicate can't prove the predicate,
1052// we try simple subtraction, which seems to help in some cases
1053// involving symbolics.
1054bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
1055 const SCEV *Y) const {
1056 if (Pred == CmpInst::ICMP_EQ ||
1057 Pred == CmpInst::ICMP_NE) {
1058 if ((isa<SCEVSignExtendExpr>(X) &&
1059 isa<SCEVSignExtendExpr>(Y)) ||
1060 (isa<SCEVZeroExtendExpr>(X) &&
1061 isa<SCEVZeroExtendExpr>(Y))) {
1062 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X);
1063 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y);
1064 const SCEV *Xop = CX->getOperand();
1065 const SCEV *Yop = CY->getOperand();
1066 if (Xop->getType() == Yop->getType()) {
1067 X = Xop;
1068 Y = Yop;
1069 }
1070 }
1071 }
1072 if (SE->isKnownPredicate(Pred, X, Y))
1073 return true;
1074 // If SE->isKnownPredicate can't prove the condition,
1075 // we try the brute-force approach of subtracting
1076 // and testing the difference.
1077 // By testing with SE->isKnownPredicate first, we avoid
1078 // the possibility of overflow when the arguments are constants.
1079 const SCEV *Delta = SE->getMinusSCEV(X, Y);
1080 switch (Pred) {
1081 case CmpInst::ICMP_EQ:
1082 return Delta->isZero();
1083 case CmpInst::ICMP_NE:
1084 return SE->isKnownNonZero(Delta);
1085 case CmpInst::ICMP_SGE:
1086 return SE->isKnownNonNegative(Delta);
1087 case CmpInst::ICMP_SLE:
1088 return SE->isKnownNonPositive(Delta);
1089 case CmpInst::ICMP_SGT:
1090 return SE->isKnownPositive(Delta);
1091 case CmpInst::ICMP_SLT:
1092 return SE->isKnownNegative(Delta);
1093 default:
1094 llvm_unreachable("unexpected predicate in isKnownPredicate");
1095 }
1096}
1097
1098/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))
1099/// with some extra checking if S is an AddRec and we can prove less-than using
1100/// the loop bounds.
1101bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
1102 // First unify to the same type
1103 auto *SType = dyn_cast<IntegerType>(S->getType());
1104 auto *SizeType = dyn_cast<IntegerType>(Size->getType());
1105 if (!SType || !SizeType)
1106 return false;
1107 Type *MaxType =
1108 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
1109 S = SE->getTruncateOrZeroExtend(S, MaxType);
1110 Size = SE->getTruncateOrZeroExtend(Size, MaxType);
1111
1112 // Special check for addrecs using BE taken count
1113 const SCEV *Bound = SE->getMinusSCEV(S, Size);
1114 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {
1115 if (AddRec->isAffine()) {
1116 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
1117 if (!isa<SCEVCouldNotCompute>(BECount)) {
1118 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
1119 if (SE->isKnownNegative(Limit))
1120 return true;
1121 }
1122 }
1123 }
1124
1125 // Check using normal isKnownNegative
1126 const SCEV *LimitedBound =
1127 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
1128 return SE->isKnownNegative(LimitedBound);
1129}
1130
1131bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {
1132 bool Inbounds = false;
1133 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
1134 Inbounds = SrcGEP->isInBounds();
1135 if (Inbounds) {
1136 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
1137 if (AddRec->isAffine()) {
1138 // We know S is for Ptr, the operand on a load/store, so doesn't wrap.
1139 // If both parts are NonNegative, the end result will be NonNegative
1140 if (SE->isKnownNonNegative(AddRec->getStart()) &&
1141 SE->isKnownNonNegative(AddRec->getOperand(1)))
1142 return true;
1143 }
1144 }
1145 }
1146
1147 return SE->isKnownNonNegative(S);
1148}
1149
1150// All subscripts are all the same type.
1151// Loop bound may be smaller (e.g., a char).
1152// Should zero extend loop bound, since it's always >= 0.
1153// This routine collects upper bound and extends or truncates if needed.
1154// Truncating is safe when subscripts are known not to wrap. Cases without
1155// nowrap flags should have been rejected earlier.
1156// Return null if no bound available.
1157const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1159 const SCEV *UB = SE->getBackedgeTakenCount(L);
1160 return SE->getTruncateOrZeroExtend(UB, T);
1161 }
1162 return nullptr;
1163}
1164
1165
1166// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1167// If the cast fails, returns NULL.
1168const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1169 Type *T) const {
1170 if (const SCEV *UB = collectUpperBound(L, T))
1171 return dyn_cast<SCEVConstant>(UB);
1172 return nullptr;
1173}
1174
1175
1176// testZIV -
1177// When we have a pair of subscripts of the form [c1] and [c2],
1178// where c1 and c2 are both loop invariant, we attack it using
1179// the ZIV test. Basically, we test by comparing the two values,
1180// but there are actually three possible results:
1181// 1) the values are equal, so there's a dependence
1182// 2) the values are different, so there's no dependence
1183// 3) the values might be equal, so we have to assume a dependence.
1184//
1185// Return true if dependence disproved.
1186bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1187 FullDependence &Result) const {
1188 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1189 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1190 ++ZIVapplications;
1191 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
1192 LLVM_DEBUG(dbgs() << " provably dependent\n");
1193 return false; // provably dependent
1194 }
1195 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
1196 LLVM_DEBUG(dbgs() << " provably independent\n");
1197 ++ZIVindependence;
1198 return true; // provably independent
1199 }
1200 LLVM_DEBUG(dbgs() << " possibly dependent\n");
1201 Result.Consistent = false;
1202 return false; // possibly dependent
1203}
1204
1205
1206// strongSIVtest -
1207// From the paper, Practical Dependence Testing, Section 4.2.1
1208//
1209// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1210// where i is an induction variable, c1 and c2 are loop invariant,
1211// and a is a constant, we can solve it exactly using the Strong SIV test.
1212//
1213// Can prove independence. Failing that, can compute distance (and direction).
1214// In the presence of symbolic terms, we can sometimes make progress.
1215//
1216// If there's a dependence,
1217//
1218// c1 + a*i = c2 + a*i'
1219//
1220// The dependence distance is
1221//
1222// d = i' - i = (c1 - c2)/a
1223//
1224// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1225// loop's upper bound. If a dependence exists, the dependence direction is
1226// defined as
1227//
1228// { < if d > 0
1229// direction = { = if d = 0
1230// { > if d < 0
1231//
1232// Return true if dependence disproved.
1233bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
1234 const SCEV *DstConst, const Loop *CurLoop,
1235 unsigned Level, FullDependence &Result,
1236 Constraint &NewConstraint) const {
1237 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1238 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
1239 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1240 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
1241 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1242 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
1243 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1244 ++StrongSIVapplications;
1245 assert(0 < Level && Level <= CommonLevels && "level out of range");
1246 Level--;
1247
1248 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1249 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
1250 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1251
1252 // check that |Delta| < iteration count
1253 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1254 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
1255 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1256 const SCEV *AbsDelta =
1257 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
1258 const SCEV *AbsCoeff =
1259 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
1260 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
1261 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
1262 // Distance greater than trip count - no dependence
1263 ++StrongSIVindependence;
1264 ++StrongSIVsuccesses;
1265 return true;
1266 }
1267 }
1268
1269 // Can we compute distance?
1270 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
1271 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
1272 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
1273 APInt Distance = ConstDelta; // these need to be initialized
1274 APInt Remainder = ConstDelta;
1275 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
1276 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1277 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1278 // Make sure Coeff divides Delta exactly
1279 if (Remainder != 0) {
1280 // Coeff doesn't divide Distance, no dependence
1281 ++StrongSIVindependence;
1282 ++StrongSIVsuccesses;
1283 return true;
1284 }
1285 Result.DV[Level].Distance = SE->getConstant(Distance);
1286 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
1287 if (Distance.sgt(0))
1288 Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1289 else if (Distance.slt(0))
1290 Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1291 else
1292 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1293 ++StrongSIVsuccesses;
1294 }
1295 else if (Delta->isZero()) {
1296 // since 0/X == 0
1297 Result.DV[Level].Distance = Delta;
1298 NewConstraint.setDistance(Delta, CurLoop);
1299 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1300 ++StrongSIVsuccesses;
1301 }
1302 else {
1303 if (Coeff->isOne()) {
1304 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1305 Result.DV[Level].Distance = Delta; // since X/1 == X
1306 NewConstraint.setDistance(Delta, CurLoop);
1307 }
1308 else {
1309 Result.Consistent = false;
1310 NewConstraint.setLine(Coeff,
1311 SE->getNegativeSCEV(Coeff),
1312 SE->getNegativeSCEV(Delta), CurLoop);
1313 }
1314
1315 // maybe we can get a useful direction
1316 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta);
1317 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
1318 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
1319 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
1320 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
1321 // The double negatives above are confusing.
1322 // It helps to read !SE->isKnownNonZero(Delta)
1323 // as "Delta might be Zero"
1324 unsigned NewDirection = Dependence::DVEntry::NONE;
1325 if ((DeltaMaybePositive && CoeffMaybePositive) ||
1326 (DeltaMaybeNegative && CoeffMaybeNegative))
1327 NewDirection = Dependence::DVEntry::LT;
1328 if (DeltaMaybeZero)
1329 NewDirection |= Dependence::DVEntry::EQ;
1330 if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1331 (DeltaMaybePositive && CoeffMaybeNegative))
1332 NewDirection |= Dependence::DVEntry::GT;
1333 if (NewDirection < Result.DV[Level].Direction)
1334 ++StrongSIVsuccesses;
1335 Result.DV[Level].Direction &= NewDirection;
1336 }
1337 return false;
1338}
1339
1340
1341// weakCrossingSIVtest -
1342// From the paper, Practical Dependence Testing, Section 4.2.2
1343//
1344// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1345// where i is an induction variable, c1 and c2 are loop invariant,
1346// and a is a constant, we can solve it exactly using the
1347// Weak-Crossing SIV test.
1348//
1349// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1350// the two lines, where i = i', yielding
1351//
1352// c1 + a*i = c2 - a*i
1353// 2a*i = c2 - c1
1354// i = (c2 - c1)/2a
1355//
1356// If i < 0, there is no dependence.
1357// If i > upperbound, there is no dependence.
1358// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1359// If i = upperbound, there's a dependence with distance = 0.
1360// If i is integral, there's a dependence (all directions).
1361// If the non-integer part = 1/2, there's a dependence (<> directions).
1362// Otherwise, there's no dependence.
1363//
1364// Can prove independence. Failing that,
1365// can sometimes refine the directions.
1366// Can determine iteration for splitting.
1367//
1368// Return true if dependence disproved.
1369bool DependenceInfo::weakCrossingSIVtest(
1370 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
1371 const Loop *CurLoop, unsigned Level, FullDependence &Result,
1372 Constraint &NewConstraint, const SCEV *&SplitIter) const {
1373 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1374 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1375 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1376 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1377 ++WeakCrossingSIVapplications;
1378 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1379 Level--;
1380 Result.Consistent = false;
1381 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1382 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1383 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
1384 if (Delta->isZero()) {
1385 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1386 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1387 ++WeakCrossingSIVsuccesses;
1388 if (!Result.DV[Level].Direction) {
1389 ++WeakCrossingSIVindependence;
1390 return true;
1391 }
1392 Result.DV[Level].Distance = Delta; // = 0
1393 return false;
1394 }
1395 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
1396 if (!ConstCoeff)
1397 return false;
1398
1399 Result.DV[Level].Splitable = true;
1400 if (SE->isKnownNegative(ConstCoeff)) {
1401 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
1402 assert(ConstCoeff &&
1403 "dynamic cast of negative of ConstCoeff should yield constant");
1404 Delta = SE->getNegativeSCEV(Delta);
1405 }
1406 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1407
1408 // compute SplitIter for use by DependenceInfo::getSplitIteration()
1409 SplitIter = SE->getUDivExpr(
1410 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
1411 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
1412 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
1413
1414 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1415 if (!ConstDelta)
1416 return false;
1417
1418 // We're certain that ConstCoeff > 0; therefore,
1419 // if Delta < 0, then no dependence.
1420 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1421 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
1422 if (SE->isKnownNegative(Delta)) {
1423 // No dependence, Delta < 0
1424 ++WeakCrossingSIVindependence;
1425 ++WeakCrossingSIVsuccesses;
1426 return true;
1427 }
1428
1429 // We're certain that Delta > 0 and ConstCoeff > 0.
1430 // Check Delta/(2*ConstCoeff) against upper loop bound
1431 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1432 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1433 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
1434 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
1435 ConstantTwo);
1436 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
1437 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
1438 // Delta too big, no dependence
1439 ++WeakCrossingSIVindependence;
1440 ++WeakCrossingSIVsuccesses;
1441 return true;
1442 }
1443 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
1444 // i = i' = UB
1445 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1446 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1447 ++WeakCrossingSIVsuccesses;
1448 if (!Result.DV[Level].Direction) {
1449 ++WeakCrossingSIVindependence;
1450 return true;
1451 }
1452 Result.DV[Level].Splitable = false;
1453 Result.DV[Level].Distance = SE->getZero(Delta->getType());
1454 return false;
1455 }
1456 }
1457
1458 // check that Coeff divides Delta
1459 APInt APDelta = ConstDelta->getAPInt();
1460 APInt APCoeff = ConstCoeff->getAPInt();
1461 APInt Distance = APDelta; // these need to be initialzed
1462 APInt Remainder = APDelta;
1463 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
1464 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1465 if (Remainder != 0) {
1466 // Coeff doesn't divide Delta, no dependence
1467 ++WeakCrossingSIVindependence;
1468 ++WeakCrossingSIVsuccesses;
1469 return true;
1470 }
1471 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1472
1473 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1474 APInt Two = APInt(Distance.getBitWidth(), 2, true);
1475 Remainder = Distance.srem(Two);
1476 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1477 if (Remainder != 0) {
1478 // Equal direction isn't possible
1479 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
1480 ++WeakCrossingSIVsuccesses;
1481 }
1482 return false;
1483}
1484
1485
1486// Kirch's algorithm, from
1487//
1488// Optimizing Supercompilers for Supercomputers
1489// Michael Wolfe
1490// MIT Press, 1989
1491//
1492// Program 2.1, page 29.
1493// Computes the GCD of AM and BM.
1494// Also finds a solution to the equation ax - by = gcd(a, b).
1495// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1496static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1497 const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1498 APInt A0(Bits, 1, true), A1(Bits, 0, true);
1499 APInt B0(Bits, 0, true), B1(Bits, 1, true);
1500 APInt G0 = AM.abs();
1501 APInt G1 = BM.abs();
1502 APInt Q = G0; // these need to be initialized
1503 APInt R = G0;
1504 APInt::sdivrem(G0, G1, Q, R);
1505 while (R != 0) {
1506 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1507 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1508 G0 = G1; G1 = R;
1509 APInt::sdivrem(G0, G1, Q, R);
1510 }
1511 G = G1;
1512 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1513 X = AM.slt(0) ? -A1 : A1;
1514 Y = BM.slt(0) ? B1 : -B1;
1515
1516 // make sure gcd divides Delta
1517 R = Delta.srem(G);
1518 if (R != 0)
1519 return true; // gcd doesn't divide Delta, no dependence
1520 Q = Delta.sdiv(G);
1521 return false;
1522}
1523
1524static APInt floorOfQuotient(const APInt &A, const APInt &B) {
1525 APInt Q = A; // these need to be initialized
1526 APInt R = A;
1527 APInt::sdivrem(A, B, Q, R);
1528 if (R == 0)
1529 return Q;
1530 if ((A.sgt(0) && B.sgt(0)) ||
1531 (A.slt(0) && B.slt(0)))
1532 return Q;
1533 else
1534 return Q - 1;
1535}
1536
1537static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
1538 APInt Q = A; // these need to be initialized
1539 APInt R = A;
1540 APInt::sdivrem(A, B, Q, R);
1541 if (R == 0)
1542 return Q;
1543 if ((A.sgt(0) && B.sgt(0)) ||
1544 (A.slt(0) && B.slt(0)))
1545 return Q + 1;
1546 else
1547 return Q;
1548}
1549
1550// exactSIVtest -
1551// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1552// where i is an induction variable, c1 and c2 are loop invariant, and a1
1553// and a2 are constant, we can solve it exactly using an algorithm developed
1554// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1555//
1556// Dependence Analysis for Supercomputing
1557// Utpal Banerjee
1558// Kluwer Academic Publishers, 1988
1559//
1560// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1561// so use them if possible. They're also a bit better with symbolics and,
1562// in the case of the strong SIV test, can compute Distances.
1563//
1564// Return true if dependence disproved.
1565//
1566// This is a modified version of the original Banerjee algorithm. The original
1567// only tested whether Dst depends on Src. This algorithm extends that and
1568// returns all the dependencies that exist between Dst and Src.
1569bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1570 const SCEV *SrcConst, const SCEV *DstConst,
1571 const Loop *CurLoop, unsigned Level,
1572 FullDependence &Result,
1573 Constraint &NewConstraint) const {
1574 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1575 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1576 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1577 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1578 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1579 ++ExactSIVapplications;
1580 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1581 Level--;
1582 Result.Consistent = false;
1583 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1584 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1585 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
1586 CurLoop);
1587 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1588 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1589 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1590 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1591 return false;
1592
1593 // find gcd
1594 APInt G, X, Y;
1595 APInt AM = ConstSrcCoeff->getAPInt();
1596 APInt BM = ConstDstCoeff->getAPInt();
1597 APInt CM = ConstDelta->getAPInt();
1598 unsigned Bits = AM.getBitWidth();
1599 if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
1600 // gcd doesn't divide Delta, no dependence
1601 ++ExactSIVindependence;
1602 ++ExactSIVsuccesses;
1603 return true;
1604 }
1605
1606 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1607
1608 // since SCEV construction normalizes, LM = 0
1609 APInt UM(Bits, 1, true);
1610 bool UMValid = false;
1611 // UM is perhaps unavailable, let's check
1612 if (const SCEVConstant *CUB =
1613 collectConstantUpperBound(CurLoop, Delta->getType())) {
1614 UM = CUB->getAPInt();
1615 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n");
1616 UMValid = true;
1617 }
1618
1621 APInt TC = CM.sdiv(G);
1622 APInt TX = X * TC;
1623 APInt TY = Y * TC;
1624 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
1625 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
1626 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
1627
1628 SmallVector<APInt, 2> TLVec, TUVec;
1629 APInt TB = BM.sdiv(G);
1630 if (TB.sgt(0)) {
1631 TLVec.push_back(ceilingOfQuotient(-TX, TB));
1632 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1633 // New bound check - modification to Banerjee's e3 check
1634 if (UMValid) {
1635 TUVec.push_back(floorOfQuotient(UM - TX, TB));
1636 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1637 }
1638 } else {
1639 TUVec.push_back(floorOfQuotient(-TX, TB));
1640 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1641 // New bound check - modification to Banerjee's e3 check
1642 if (UMValid) {
1643 TLVec.push_back(ceilingOfQuotient(UM - TX, TB));
1644 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1645 }
1646 }
1647
1648 APInt TA = AM.sdiv(G);
1649 if (TA.sgt(0)) {
1650 if (UMValid) {
1651 TUVec.push_back(floorOfQuotient(UM - TY, TA));
1652 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1653 }
1654 // New bound check - modification to Banerjee's e3 check
1655 TLVec.push_back(ceilingOfQuotient(-TY, TA));
1656 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1657 } else {
1658 if (UMValid) {
1659 TLVec.push_back(ceilingOfQuotient(UM - TY, TA));
1660 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
1661 }
1662 // New bound check - modification to Banerjee's e3 check
1663 TUVec.push_back(floorOfQuotient(-TY, TA));
1664 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
1665 }
1666
1667 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1668 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1669
1670 if (TLVec.empty() || TUVec.empty())
1671 return false;
1672 TL = APIntOps::smax(TLVec.front(), TLVec.back());
1673 TU = APIntOps::smin(TUVec.front(), TUVec.back());
1674 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1675 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1676
1677 if (TL.sgt(TU)) {
1678 ++ExactSIVindependence;
1679 ++ExactSIVsuccesses;
1680 return true;
1681 }
1682
1683 // explore directions
1684 unsigned NewDirection = Dependence::DVEntry::NONE;
1685 APInt LowerDistance, UpperDistance;
1686 if (TA.sgt(TB)) {
1687 LowerDistance = (TY - TX) + (TA - TB) * TL;
1688 UpperDistance = (TY - TX) + (TA - TB) * TU;
1689 } else {
1690 LowerDistance = (TY - TX) + (TA - TB) * TU;
1691 UpperDistance = (TY - TX) + (TA - TB) * TL;
1692 }
1693
1694 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n");
1695 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n");
1696
1697 APInt Zero(Bits, 0, true);
1698 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) {
1699 NewDirection |= Dependence::DVEntry::EQ;
1700 ++ExactSIVsuccesses;
1701 }
1702 if (LowerDistance.slt(0)) {
1703 NewDirection |= Dependence::DVEntry::GT;
1704 ++ExactSIVsuccesses;
1705 }
1706 if (UpperDistance.sgt(0)) {
1707 NewDirection |= Dependence::DVEntry::LT;
1708 ++ExactSIVsuccesses;
1709 }
1710
1711 // finished
1712 Result.DV[Level].Direction &= NewDirection;
1713 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1714 ++ExactSIVindependence;
1715 LLVM_DEBUG(dbgs() << "\t Result = ");
1716 LLVM_DEBUG(Result.dump(dbgs()));
1717 return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1718}
1719
1720
1721// Return true if the divisor evenly divides the dividend.
1722static
1723bool isRemainderZero(const SCEVConstant *Dividend,
1724 const SCEVConstant *Divisor) {
1725 const APInt &ConstDividend = Dividend->getAPInt();
1726 const APInt &ConstDivisor = Divisor->getAPInt();
1727 return ConstDividend.srem(ConstDivisor) == 0;
1728}
1729
1730
1731// weakZeroSrcSIVtest -
1732// From the paper, Practical Dependence Testing, Section 4.2.2
1733//
1734// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1735// where i is an induction variable, c1 and c2 are loop invariant,
1736// and a is a constant, we can solve it exactly using the
1737// Weak-Zero SIV test.
1738//
1739// Given
1740//
1741// c1 = c2 + a*i
1742//
1743// we get
1744//
1745// (c1 - c2)/a = i
1746//
1747// If i is not an integer, there's no dependence.
1748// If i < 0 or > UB, there's no dependence.
1749// If i = 0, the direction is >= and peeling the
1750// 1st iteration will break the dependence.
1751// If i = UB, the direction is <= and peeling the
1752// last iteration will break the dependence.
1753// Otherwise, the direction is *.
1754//
1755// Can prove independence. Failing that, we can sometimes refine
1756// the directions. Can sometimes show that first or last
1757// iteration carries all the dependences (so worth peeling).
1758//
1759// (see also weakZeroDstSIVtest)
1760//
1761// Return true if dependence disproved.
1762bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1763 const SCEV *SrcConst,
1764 const SCEV *DstConst,
1765 const Loop *CurLoop, unsigned Level,
1766 FullDependence &Result,
1767 Constraint &NewConstraint) const {
1768 // For the WeakSIV test, it's possible the loop isn't common to
1769 // the Src and Dst loops. If it isn't, then there's no need to
1770 // record a direction.
1771 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1772 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1773 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1774 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1775 ++WeakZeroSIVapplications;
1776 assert(0 < Level && Level <= MaxLevels && "Level out of range");
1777 Level--;
1778 Result.Consistent = false;
1779 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1780 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
1781 CurLoop);
1782 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1783 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
1784 if (Level < CommonLevels) {
1785 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1786 Result.DV[Level].PeelFirst = true;
1787 ++WeakZeroSIVsuccesses;
1788 }
1789 return false; // dependences caused by first iteration
1790 }
1791 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1792 if (!ConstCoeff)
1793 return false;
1794 const SCEV *AbsCoeff =
1795 SE->isKnownNegative(ConstCoeff) ?
1796 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
1797 const SCEV *NewDelta =
1798 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1799
1800 // check that Delta/SrcCoeff < iteration count
1801 // really check NewDelta < count*AbsCoeff
1802 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1803 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1804 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1805 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1806 ++WeakZeroSIVindependence;
1807 ++WeakZeroSIVsuccesses;
1808 return true;
1809 }
1810 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1811 // dependences caused by last iteration
1812 if (Level < CommonLevels) {
1813 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1814 Result.DV[Level].PeelLast = true;
1815 ++WeakZeroSIVsuccesses;
1816 }
1817 return false;
1818 }
1819 }
1820
1821 // check that Delta/SrcCoeff >= 0
1822 // really check that NewDelta >= 0
1823 if (SE->isKnownNegative(NewDelta)) {
1824 // No dependence, newDelta < 0
1825 ++WeakZeroSIVindependence;
1826 ++WeakZeroSIVsuccesses;
1827 return true;
1828 }
1829
1830 // if SrcCoeff doesn't divide Delta, then no dependence
1831 if (isa<SCEVConstant>(Delta) &&
1832 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1833 ++WeakZeroSIVindependence;
1834 ++WeakZeroSIVsuccesses;
1835 return true;
1836 }
1837 return false;
1838}
1839
1840
1841// weakZeroDstSIVtest -
1842// From the paper, Practical Dependence Testing, Section 4.2.2
1843//
1844// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1845// where i is an induction variable, c1 and c2 are loop invariant,
1846// and a is a constant, we can solve it exactly using the
1847// Weak-Zero SIV test.
1848//
1849// Given
1850//
1851// c1 + a*i = c2
1852//
1853// we get
1854//
1855// i = (c2 - c1)/a
1856//
1857// If i is not an integer, there's no dependence.
1858// If i < 0 or > UB, there's no dependence.
1859// If i = 0, the direction is <= and peeling the
1860// 1st iteration will break the dependence.
1861// If i = UB, the direction is >= and peeling the
1862// last iteration will break the dependence.
1863// Otherwise, the direction is *.
1864//
1865// Can prove independence. Failing that, we can sometimes refine
1866// the directions. Can sometimes show that first or last
1867// iteration carries all the dependences (so worth peeling).
1868//
1869// (see also weakZeroSrcSIVtest)
1870//
1871// Return true if dependence disproved.
1872bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1873 const SCEV *SrcConst,
1874 const SCEV *DstConst,
1875 const Loop *CurLoop, unsigned Level,
1876 FullDependence &Result,
1877 Constraint &NewConstraint) const {
1878 // For the WeakSIV test, it's possible the loop isn't common to the
1879 // Src and Dst loops. If it isn't, then there's no need to record a direction.
1880 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1881 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1882 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1883 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1884 ++WeakZeroSIVapplications;
1885 assert(0 < Level && Level <= SrcLevels && "Level out of range");
1886 Level--;
1887 Result.Consistent = false;
1888 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1889 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
1890 CurLoop);
1891 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1892 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
1893 if (Level < CommonLevels) {
1894 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1895 Result.DV[Level].PeelFirst = true;
1896 ++WeakZeroSIVsuccesses;
1897 }
1898 return false; // dependences caused by first iteration
1899 }
1900 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1901 if (!ConstCoeff)
1902 return false;
1903 const SCEV *AbsCoeff =
1904 SE->isKnownNegative(ConstCoeff) ?
1905 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
1906 const SCEV *NewDelta =
1907 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1908
1909 // check that Delta/SrcCoeff < iteration count
1910 // really check NewDelta < count*AbsCoeff
1911 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1912 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1913 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1914 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1915 ++WeakZeroSIVindependence;
1916 ++WeakZeroSIVsuccesses;
1917 return true;
1918 }
1919 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1920 // dependences caused by last iteration
1921 if (Level < CommonLevels) {
1922 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1923 Result.DV[Level].PeelLast = true;
1924 ++WeakZeroSIVsuccesses;
1925 }
1926 return false;
1927 }
1928 }
1929
1930 // check that Delta/SrcCoeff >= 0
1931 // really check that NewDelta >= 0
1932 if (SE->isKnownNegative(NewDelta)) {
1933 // No dependence, newDelta < 0
1934 ++WeakZeroSIVindependence;
1935 ++WeakZeroSIVsuccesses;
1936 return true;
1937 }
1938
1939 // if SrcCoeff doesn't divide Delta, then no dependence
1940 if (isa<SCEVConstant>(Delta) &&
1941 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1942 ++WeakZeroSIVindependence;
1943 ++WeakZeroSIVsuccesses;
1944 return true;
1945 }
1946 return false;
1947}
1948
1949
1950// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1951// Things of the form [c1 + a*i] and [c2 + b*j],
1952// where i and j are induction variable, c1 and c2 are loop invariant,
1953// and a and b are constants.
1954// Returns true if any possible dependence is disproved.
1955// Marks the result as inconsistent.
1956// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1957bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1958 const SCEV *SrcConst, const SCEV *DstConst,
1959 const Loop *SrcLoop, const Loop *DstLoop,
1960 FullDependence &Result) const {
1961 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1962 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1963 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1964 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1965 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1966 ++ExactRDIVapplications;
1967 Result.Consistent = false;
1968 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1969 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1970 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1971 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1972 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1973 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1974 return false;
1975
1976 // find gcd
1977 APInt G, X, Y;
1978 APInt AM = ConstSrcCoeff->getAPInt();
1979 APInt BM = ConstDstCoeff->getAPInt();
1980 APInt CM = ConstDelta->getAPInt();
1981 unsigned Bits = AM.getBitWidth();
1982 if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
1983 // gcd doesn't divide Delta, no dependence
1984 ++ExactRDIVindependence;
1985 return true;
1986 }
1987
1988 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1989
1990 // since SCEV construction seems to normalize, LM = 0
1991 APInt SrcUM(Bits, 1, true);
1992 bool SrcUMvalid = false;
1993 // SrcUM is perhaps unavailable, let's check
1994 if (const SCEVConstant *UpperBound =
1995 collectConstantUpperBound(SrcLoop, Delta->getType())) {
1996 SrcUM = UpperBound->getAPInt();
1997 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");
1998 SrcUMvalid = true;
1999 }
2000
2001 APInt DstUM(Bits, 1, true);
2002 bool DstUMvalid = false;
2003 // UM is perhaps unavailable, let's check
2004 if (const SCEVConstant *UpperBound =
2005 collectConstantUpperBound(DstLoop, Delta->getType())) {
2006 DstUM = UpperBound->getAPInt();
2007 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");
2008 DstUMvalid = true;
2009 }
2010
2013 APInt TC = CM.sdiv(G);
2014 APInt TX = X * TC;
2015 APInt TY = Y * TC;
2016 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
2017 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
2018 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
2019
2020 SmallVector<APInt, 2> TLVec, TUVec;
2021 APInt TB = BM.sdiv(G);
2022 if (TB.sgt(0)) {
2023 TLVec.push_back(ceilingOfQuotient(-TX, TB));
2024 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2025 if (SrcUMvalid) {
2026 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB));
2027 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2028 }
2029 } else {
2030 TUVec.push_back(floorOfQuotient(-TX, TB));
2031 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2032 if (SrcUMvalid) {
2033 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB));
2034 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2035 }
2036 }
2037
2038 APInt TA = AM.sdiv(G);
2039 if (TA.sgt(0)) {
2040 TLVec.push_back(ceilingOfQuotient(-TY, TA));
2041 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2042 if (DstUMvalid) {
2043 TUVec.push_back(floorOfQuotient(DstUM - TY, TA));
2044 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2045 }
2046 } else {
2047 TUVec.push_back(floorOfQuotient(-TY, TA));
2048 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");
2049 if (DstUMvalid) {
2050 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA));
2051 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");
2052 }
2053 }
2054
2055 if (TLVec.empty() || TUVec.empty())
2056 return false;
2057
2058 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
2059 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
2060
2061 TL = APIntOps::smax(TLVec.front(), TLVec.back());
2062 TU = APIntOps::smin(TUVec.front(), TUVec.back());
2063 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
2064 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
2065
2066 if (TL.sgt(TU))
2067 ++ExactRDIVindependence;
2068 return TL.sgt(TU);
2069}
2070
2071
2072// symbolicRDIVtest -
2073// In Section 4.5 of the Practical Dependence Testing paper,the authors
2074// introduce a special case of Banerjee's Inequalities (also called the
2075// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
2076// particularly cases with symbolics. Since it's only able to disprove
2077// dependence (not compute distances or directions), we'll use it as a
2078// fall back for the other tests.
2079//
2080// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2081// where i and j are induction variables and c1 and c2 are loop invariants,
2082// we can use the symbolic tests to disprove some dependences, serving as a
2083// backup for the RDIV test. Note that i and j can be the same variable,
2084// letting this test serve as a backup for the various SIV tests.
2085//
2086// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2087// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2088// loop bounds for the i and j loops, respectively. So, ...
2089//
2090// c1 + a1*i = c2 + a2*j
2091// a1*i - a2*j = c2 - c1
2092//
2093// To test for a dependence, we compute c2 - c1 and make sure it's in the
2094// range of the maximum and minimum possible values of a1*i - a2*j.
2095// Considering the signs of a1 and a2, we have 4 possible cases:
2096//
2097// 1) If a1 >= 0 and a2 >= 0, then
2098// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2099// -a2*N2 <= c2 - c1 <= a1*N1
2100//
2101// 2) If a1 >= 0 and a2 <= 0, then
2102// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2103// 0 <= c2 - c1 <= a1*N1 - a2*N2
2104//
2105// 3) If a1 <= 0 and a2 >= 0, then
2106// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2107// a1*N1 - a2*N2 <= c2 - c1 <= 0
2108//
2109// 4) If a1 <= 0 and a2 <= 0, then
2110// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
2111// a1*N1 <= c2 - c1 <= -a2*N2
2112//
2113// return true if dependence disproved
2114bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
2115 const SCEV *C1, const SCEV *C2,
2116 const Loop *Loop1,
2117 const Loop *Loop2) const {
2118 ++SymbolicRDIVapplications;
2119 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2120 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
2121 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2122 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
2123 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
2124 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
2125 const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
2126 const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
2127 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
2128 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
2129 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
2130 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
2131 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
2132 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
2133 if (SE->isKnownNonNegative(A1)) {
2134 if (SE->isKnownNonNegative(A2)) {
2135 // A1 >= 0 && A2 >= 0
2136 if (N1) {
2137 // make sure that c2 - c1 <= a1*N1
2138 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2139 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2140 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
2141 ++SymbolicRDIVindependence;
2142 return true;
2143 }
2144 }
2145 if (N2) {
2146 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2147 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2148 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2149 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
2150 ++SymbolicRDIVindependence;
2151 return true;
2152 }
2153 }
2154 }
2155 else if (SE->isKnownNonPositive(A2)) {
2156 // a1 >= 0 && a2 <= 0
2157 if (N1 && N2) {
2158 // make sure that c2 - c1 <= a1*N1 - a2*N2
2159 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2160 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2161 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2162 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2163 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
2164 ++SymbolicRDIVindependence;
2165 return true;
2166 }
2167 }
2168 // make sure that 0 <= c2 - c1
2169 if (SE->isKnownNegative(C2_C1)) {
2170 ++SymbolicRDIVindependence;
2171 return true;
2172 }
2173 }
2174 }
2175 else if (SE->isKnownNonPositive(A1)) {
2176 if (SE->isKnownNonNegative(A2)) {
2177 // a1 <= 0 && a2 >= 0
2178 if (N1 && N2) {
2179 // make sure that a1*N1 - a2*N2 <= c2 - c1
2180 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2181 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2182 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2183 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2184 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
2185 ++SymbolicRDIVindependence;
2186 return true;
2187 }
2188 }
2189 // make sure that c2 - c1 <= 0
2190 if (SE->isKnownPositive(C2_C1)) {
2191 ++SymbolicRDIVindependence;
2192 return true;
2193 }
2194 }
2195 else if (SE->isKnownNonPositive(A2)) {
2196 // a1 <= 0 && a2 <= 0
2197 if (N1) {
2198 // make sure that a1*N1 <= c2 - c1
2199 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2200 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2201 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
2202 ++SymbolicRDIVindependence;
2203 return true;
2204 }
2205 }
2206 if (N2) {
2207 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2208 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2209 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2210 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
2211 ++SymbolicRDIVindependence;
2212 return true;
2213 }
2214 }
2215 }
2216 }
2217 return false;
2218}
2219
2220
2221// testSIV -
2222// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2223// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2224// a2 are constant, we attack it with an SIV test. While they can all be
2225// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2226// they apply; they're cheaper and sometimes more precise.
2227//
2228// Return true if dependence disproved.
2229bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2230 FullDependence &Result, Constraint &NewConstraint,
2231 const SCEV *&SplitIter) const {
2232 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2233 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2234 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2235 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2236 if (SrcAddRec && DstAddRec) {
2237 const SCEV *SrcConst = SrcAddRec->getStart();
2238 const SCEV *DstConst = DstAddRec->getStart();
2239 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2240 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2241 const Loop *CurLoop = SrcAddRec->getLoop();
2242 assert(CurLoop == DstAddRec->getLoop() &&
2243 "both loops in SIV should be same");
2244 Level = mapSrcLoop(CurLoop);
2245 bool disproven;
2246 if (SrcCoeff == DstCoeff)
2247 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2248 Level, Result, NewConstraint);
2249 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
2250 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2251 Level, Result, NewConstraint, SplitIter);
2252 else
2253 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2254 Level, Result, NewConstraint);
2255 return disproven ||
2256 gcdMIVtest(Src, Dst, Result) ||
2257 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
2258 }
2259 if (SrcAddRec) {
2260 const SCEV *SrcConst = SrcAddRec->getStart();
2261 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2262 const SCEV *DstConst = Dst;
2263 const Loop *CurLoop = SrcAddRec->getLoop();
2264 Level = mapSrcLoop(CurLoop);
2265 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2266 Level, Result, NewConstraint) ||
2267 gcdMIVtest(Src, Dst, Result);
2268 }
2269 if (DstAddRec) {
2270 const SCEV *DstConst = DstAddRec->getStart();
2271 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2272 const SCEV *SrcConst = Src;
2273 const Loop *CurLoop = DstAddRec->getLoop();
2274 Level = mapDstLoop(CurLoop);
2275 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
2276 CurLoop, Level, Result, NewConstraint) ||
2277 gcdMIVtest(Src, Dst, Result);
2278 }
2279 llvm_unreachable("SIV test expected at least one AddRec");
2280 return false;
2281}
2282
2283
2284// testRDIV -
2285// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2286// where i and j are induction variables, c1 and c2 are loop invariant,
2287// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2288// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2289// It doesn't make sense to talk about distance or direction in this case,
2290// so there's no point in making special versions of the Strong SIV test or
2291// the Weak-crossing SIV test.
2292//
2293// With minor algebra, this test can also be used for things like
2294// [c1 + a1*i + a2*j][c2].
2295//
2296// Return true if dependence disproved.
2297bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2298 FullDependence &Result) const {
2299 // we have 3 possible situations here:
2300 // 1) [a*i + b] and [c*j + d]
2301 // 2) [a*i + c*j + b] and [d]
2302 // 3) [b] and [a*i + c*j + d]
2303 // We need to find what we've got and get organized
2304
2305 const SCEV *SrcConst, *DstConst;
2306 const SCEV *SrcCoeff, *DstCoeff;
2307 const Loop *SrcLoop, *DstLoop;
2308
2309 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2310 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2311 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2312 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2313 if (SrcAddRec && DstAddRec) {
2314 SrcConst = SrcAddRec->getStart();
2315 SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2316 SrcLoop = SrcAddRec->getLoop();
2317 DstConst = DstAddRec->getStart();
2318 DstCoeff = DstAddRec->getStepRecurrence(*SE);
2319 DstLoop = DstAddRec->getLoop();
2320 }
2321 else if (SrcAddRec) {
2322 if (const SCEVAddRecExpr *tmpAddRec =
2323 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
2324 SrcConst = tmpAddRec->getStart();
2325 SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
2326 SrcLoop = tmpAddRec->getLoop();
2327 DstConst = Dst;
2328 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
2329 DstLoop = SrcAddRec->getLoop();
2330 }
2331 else
2332 llvm_unreachable("RDIV reached by surprising SCEVs");
2333 }
2334 else if (DstAddRec) {
2335 if (const SCEVAddRecExpr *tmpAddRec =
2336 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
2337 DstConst = tmpAddRec->getStart();
2338 DstCoeff = tmpAddRec->getStepRecurrence(*SE);
2339 DstLoop = tmpAddRec->getLoop();
2340 SrcConst = Src;
2341 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
2342 SrcLoop = DstAddRec->getLoop();
2343 }
2344 else
2345 llvm_unreachable("RDIV reached by surprising SCEVs");
2346 }
2347 else
2348 llvm_unreachable("RDIV expected at least one AddRec");
2349 return exactRDIVtest(SrcCoeff, DstCoeff,
2350 SrcConst, DstConst,
2351 SrcLoop, DstLoop,
2352 Result) ||
2353 gcdMIVtest(Src, Dst, Result) ||
2354 symbolicRDIVtest(SrcCoeff, DstCoeff,
2355 SrcConst, DstConst,
2356 SrcLoop, DstLoop);
2357}
2358
2359
2360// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2361// Return true if dependence disproved.
2362// Can sometimes refine direction vectors.
2363bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2364 const SmallBitVector &Loops,
2365 FullDependence &Result) const {
2366 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2367 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2368 Result.Consistent = false;
2369 return gcdMIVtest(Src, Dst, Result) ||
2370 banerjeeMIVtest(Src, Dst, Loops, Result);
2371}
2372
2373
2374// Given a product, e.g., 10*X*Y, returns the first constant operand,
2375// in this case 10. If there is no constant part, returns NULL.
2376static
2377const SCEVConstant *getConstantPart(const SCEV *Expr) {
2378 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
2379 return Constant;
2380 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
2381 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
2382 return Constant;
2383 return nullptr;
2384}
2385
2386
2387//===----------------------------------------------------------------------===//
2388// gcdMIVtest -
2389// Tests an MIV subscript pair for dependence.
2390// Returns true if any possible dependence is disproved.
2391// Marks the result as inconsistent.
2392// Can sometimes disprove the equal direction for 1 or more loops,
2393// as discussed in Michael Wolfe's book,
2394// High Performance Compilers for Parallel Computing, page 235.
2395//
2396// We spend some effort (code!) to handle cases like
2397// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2398// but M and N are just loop-invariant variables.
2399// This should help us handle linearized subscripts;
2400// also makes this test a useful backup to the various SIV tests.
2401//
2402// It occurs to me that the presence of loop-invariant variables
2403// changes the nature of the test from "greatest common divisor"
2404// to "a common divisor".
2405bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2406 FullDependence &Result) const {
2407 LLVM_DEBUG(dbgs() << "starting gcd\n");
2408 ++GCDapplications;
2409 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
2410 APInt RunningGCD = APInt::getZero(BitWidth);
2411
2412 // Examine Src coefficients.
2413 // Compute running GCD and record source constant.
2414 // Because we're looking for the constant at the end of the chain,
2415 // we can't quit the loop just because the GCD == 1.
2416 const SCEV *Coefficients = Src;
2417 while (const SCEVAddRecExpr *AddRec =
2418 dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2419 const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2420 // If the coefficient is the product of a constant and other stuff,
2421 // we can use the constant in the GCD computation.
2422 const auto *Constant = getConstantPart(Coeff);
2423 if (!Constant)
2424 return false;
2425 APInt ConstCoeff = Constant->getAPInt();
2426 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2427 Coefficients = AddRec->getStart();
2428 }
2429 const SCEV *SrcConst = Coefficients;
2430
2431 // Examine Dst coefficients.
2432 // Compute running GCD and record destination constant.
2433 // Because we're looking for the constant at the end of the chain,
2434 // we can't quit the loop just because the GCD == 1.
2435 Coefficients = Dst;
2436 while (const SCEVAddRecExpr *AddRec =
2437 dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2438 const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2439 // If the coefficient is the product of a constant and other stuff,
2440 // we can use the constant in the GCD computation.
2441 const auto *Constant = getConstantPart(Coeff);
2442 if (!Constant)
2443 return false;
2444 APInt ConstCoeff = Constant->getAPInt();
2445 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2446 Coefficients = AddRec->getStart();
2447 }
2448 const SCEV *DstConst = Coefficients;
2449
2450 APInt ExtraGCD = APInt::getZero(BitWidth);
2451 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
2452 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
2453 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
2454 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
2455 // If Delta is a sum of products, we may be able to make further progress.
2456 for (const SCEV *Operand : Sum->operands()) {
2457 if (isa<SCEVConstant>(Operand)) {
2458 assert(!Constant && "Surprised to find multiple constants");
2459 Constant = cast<SCEVConstant>(Operand);
2460 }
2461 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
2462 // Search for constant operand to participate in GCD;
2463 // If none found; return false.
2464 const SCEVConstant *ConstOp = getConstantPart(Product);
2465 if (!ConstOp)
2466 return false;
2467 APInt ConstOpValue = ConstOp->getAPInt();
2468 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
2469 ConstOpValue.abs());
2470 }
2471 else
2472 return false;
2473 }
2474 }
2475 if (!Constant)
2476 return false;
2477 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
2478 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
2479 if (ConstDelta == 0)
2480 return false;
2481 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
2482 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
2483 APInt Remainder = ConstDelta.srem(RunningGCD);
2484 if (Remainder != 0) {
2485 ++GCDindependence;
2486 return true;
2487 }
2488
2489 // Try to disprove equal directions.
2490 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2491 // the code above can't disprove the dependence because the GCD = 1.
2492 // So we consider what happen if i = i' and what happens if j = j'.
2493 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2494 // which is infeasible, so we can disallow the = direction for the i level.
2495 // Setting j = j' doesn't help matters, so we end up with a direction vector
2496 // of [<>, *]
2497 //
2498 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2499 // we need to remember that the constant part is 5 and the RunningGCD should
2500 // be initialized to ExtraGCD = 30.
2501 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
2502
2503 bool Improved = false;
2504 Coefficients = Src;
2505 while (const SCEVAddRecExpr *AddRec =
2506 dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2507 Coefficients = AddRec->getStart();
2508 const Loop *CurLoop = AddRec->getLoop();
2509 RunningGCD = ExtraGCD;
2510 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
2511 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
2512 const SCEV *Inner = Src;
2513 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2514 AddRec = cast<SCEVAddRecExpr>(Inner);
2515 const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2516 if (CurLoop == AddRec->getLoop())
2517 ; // SrcCoeff == Coeff
2518 else {
2519 // If the coefficient is the product of a constant and other stuff,
2520 // we can use the constant in the GCD computation.
2521 Constant = getConstantPart(Coeff);
2522 if (!Constant)
2523 return false;
2524 APInt ConstCoeff = Constant->getAPInt();
2525 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2526 }
2527 Inner = AddRec->getStart();
2528 }
2529 Inner = Dst;
2530 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2531 AddRec = cast<SCEVAddRecExpr>(Inner);
2532 const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2533 if (CurLoop == AddRec->getLoop())
2534 DstCoeff = Coeff;
2535 else {
2536 // If the coefficient is the product of a constant and other stuff,
2537 // we can use the constant in the GCD computation.
2538 Constant = getConstantPart(Coeff);
2539 if (!Constant)
2540 return false;
2541 APInt ConstCoeff = Constant->getAPInt();
2542 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2543 }
2544 Inner = AddRec->getStart();
2545 }
2546 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
2547 // If the coefficient is the product of a constant and other stuff,
2548 // we can use the constant in the GCD computation.
2549 Constant = getConstantPart(Delta);
2550 if (!Constant)
2551 // The difference of the two coefficients might not be a product
2552 // or constant, in which case we give up on this direction.
2553 continue;
2554 APInt ConstCoeff = Constant->getAPInt();
2555 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2556 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2557 if (RunningGCD != 0) {
2558 Remainder = ConstDelta.srem(RunningGCD);
2559 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2560 if (Remainder != 0) {
2561 unsigned Level = mapSrcLoop(CurLoop);
2562 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
2563 Improved = true;
2564 }
2565 }
2566 }
2567 if (Improved)
2568 ++GCDsuccesses;
2569 LLVM_DEBUG(dbgs() << "all done\n");
2570 return false;
2571}
2572
2573
2574//===----------------------------------------------------------------------===//
2575// banerjeeMIVtest -
2576// Use Banerjee's Inequalities to test an MIV subscript pair.
2577// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2578// Generally follows the discussion in Section 2.5.2 of
2579//
2580// Optimizing Supercompilers for Supercomputers
2581// Michael Wolfe
2582//
2583// The inequalities given on page 25 are simplified in that loops are
2584// normalized so that the lower bound is always 0 and the stride is always 1.
2585// For example, Wolfe gives
2586//
2587// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2588//
2589// where A_k is the coefficient of the kth index in the source subscript,
2590// B_k is the coefficient of the kth index in the destination subscript,
2591// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2592// index, and N_k is the stride of the kth index. Since all loops are normalized
2593// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2594// equation to
2595//
2596// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2597// = (A^-_k - B_k)^- (U_k - 1) - B_k
2598//
2599// Similar simplifications are possible for the other equations.
2600//
2601// When we can't determine the number of iterations for a loop,
2602// we use NULL as an indicator for the worst case, infinity.
2603// When computing the upper bound, NULL denotes +inf;
2604// for the lower bound, NULL denotes -inf.
2605//
2606// Return true if dependence disproved.
2607bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2608 const SmallBitVector &Loops,
2609 FullDependence &Result) const {
2610 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2611 ++BanerjeeApplications;
2612 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
2613 const SCEV *A0;
2614 CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
2615 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
2616 const SCEV *B0;
2617 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
2618 BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2619 const SCEV *Delta = SE->getMinusSCEV(B0, A0);
2620 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2621
2622 // Compute bounds for all the * directions.
2623 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2624 for (unsigned K = 1; K <= MaxLevels; ++K) {
2625 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2626 Bound[K].Direction = Dependence::DVEntry::ALL;
2627 Bound[K].DirSet = Dependence::DVEntry::NONE;
2628 findBoundsALL(A, B, Bound, K);
2629#ifndef NDEBUG
2630 LLVM_DEBUG(dbgs() << "\t " << K << '\t');
2631 if (Bound[K].Lower[Dependence::DVEntry::ALL])
2632 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2633 else
2634 LLVM_DEBUG(dbgs() << "-inf\t");
2635 if (Bound[K].Upper[Dependence::DVEntry::ALL])
2636 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2637 else
2638 LLVM_DEBUG(dbgs() << "+inf\n");
2639#endif
2640 }
2641
2642 // Test the *, *, *, ... case.
2643 bool Disproved = false;
2644 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
2645 // Explore the direction vector hierarchy.
2646 unsigned DepthExpanded = 0;
2647 unsigned NewDeps = exploreDirections(1, A, B, Bound,
2648 Loops, DepthExpanded, Delta);
2649 if (NewDeps > 0) {
2650 bool Improved = false;
2651 for (unsigned K = 1; K <= CommonLevels; ++K) {
2652 if (Loops[K]) {
2653 unsigned Old = Result.DV[K - 1].Direction;
2654 Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2655 Improved |= Old != Result.DV[K - 1].Direction;
2656 if (!Result.DV[K - 1].Direction) {
2657 Improved = false;
2658 Disproved = true;
2659 break;
2660 }
2661 }
2662 }
2663 if (Improved)
2664 ++BanerjeeSuccesses;
2665 }
2666 else {
2667 ++BanerjeeIndependence;
2668 Disproved = true;
2669 }
2670 }
2671 else {
2672 ++BanerjeeIndependence;
2673 Disproved = true;
2674 }
2675 delete [] Bound;
2676 delete [] A;
2677 delete [] B;
2678 return Disproved;
2679}
2680
2681
2682// Hierarchically expands the direction vector
2683// search space, combining the directions of discovered dependences
2684// in the DirSet field of Bound. Returns the number of distinct
2685// dependences discovered. If the dependence is disproved,
2686// it will return 0.
2687unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2688 CoefficientInfo *B, BoundInfo *Bound,
2689 const SmallBitVector &Loops,
2690 unsigned &DepthExpanded,
2691 const SCEV *Delta) const {
2692 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2693 // of common loop levels. To avoid excessive compile-time, pessimize all the
2694 // results and immediately return when the number of common levels is beyond
2695 // the given threshold.
2696 if (CommonLevels > MIVMaxLevelThreshold) {
2697 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2698 "direction exploration is terminated.\n");
2699 for (unsigned K = 1; K <= CommonLevels; ++K)
2700 if (Loops[K])
2701 Bound[K].DirSet = Dependence::DVEntry::ALL;
2702 return 1;
2703 }
2704
2705 if (Level > CommonLevels) {
2706 // record result
2707 LLVM_DEBUG(dbgs() << "\t[");
2708 for (unsigned K = 1; K <= CommonLevels; ++K) {
2709 if (Loops[K]) {
2710 Bound[K].DirSet |= Bound[K].Direction;
2711#ifndef NDEBUG
2712 switch (Bound[K].Direction) {
2714 LLVM_DEBUG(dbgs() << " <");
2715 break;
2717 LLVM_DEBUG(dbgs() << " =");
2718 break;
2720 LLVM_DEBUG(dbgs() << " >");
2721 break;
2723 LLVM_DEBUG(dbgs() << " *");
2724 break;
2725 default:
2726 llvm_unreachable("unexpected Bound[K].Direction");
2727 }
2728#endif
2729 }
2730 }
2731 LLVM_DEBUG(dbgs() << " ]\n");
2732 return 1;
2733 }
2734 if (Loops[Level]) {
2735 if (Level > DepthExpanded) {
2736 DepthExpanded = Level;
2737 // compute bounds for <, =, > at current level
2738 findBoundsLT(A, B, Bound, Level);
2739 findBoundsGT(A, B, Bound, Level);
2740 findBoundsEQ(A, B, Bound, Level);
2741#ifndef NDEBUG
2742 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2743 LLVM_DEBUG(dbgs() << "\t <\t");
2744 if (Bound[Level].Lower[Dependence::DVEntry::LT])
2745 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2746 << '\t');
2747 else
2748 LLVM_DEBUG(dbgs() << "-inf\t");
2749 if (Bound[Level].Upper[Dependence::DVEntry::LT])
2750 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2751 << '\n');
2752 else
2753 LLVM_DEBUG(dbgs() << "+inf\n");
2754 LLVM_DEBUG(dbgs() << "\t =\t");
2755 if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2756 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2757 << '\t');
2758 else
2759 LLVM_DEBUG(dbgs() << "-inf\t");
2760 if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2761 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2762 << '\n');
2763 else
2764 LLVM_DEBUG(dbgs() << "+inf\n");
2765 LLVM_DEBUG(dbgs() << "\t >\t");
2766 if (Bound[Level].Lower[Dependence::DVEntry::GT])
2767 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2768 << '\t');
2769 else
2770 LLVM_DEBUG(dbgs() << "-inf\t");
2771 if (Bound[Level].Upper[Dependence::DVEntry::GT])
2772 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2773 << '\n');
2774 else
2775 LLVM_DEBUG(dbgs() << "+inf\n");
2776#endif
2777 }
2778
2779 unsigned NewDeps = 0;
2780
2781 // test bounds for <, *, *, ...
2782 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
2783 NewDeps += exploreDirections(Level + 1, A, B, Bound,
2784 Loops, DepthExpanded, Delta);
2785
2786 // Test bounds for =, *, *, ...
2787 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
2788 NewDeps += exploreDirections(Level + 1, A, B, Bound,
2789 Loops, DepthExpanded, Delta);
2790
2791 // test bounds for >, *, *, ...
2792 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
2793 NewDeps += exploreDirections(Level + 1, A, B, Bound,
2794 Loops, DepthExpanded, Delta);
2795
2796 Bound[Level].Direction = Dependence::DVEntry::ALL;
2797 return NewDeps;
2798 }
2799 else
2800 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
2801}
2802
2803
2804// Returns true iff the current bounds are plausible.
2805bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2806 BoundInfo *Bound, const SCEV *Delta) const {
2807 Bound[Level].Direction = DirKind;
2808 if (const SCEV *LowerBound = getLowerBound(Bound))
2809 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
2810 return false;
2811 if (const SCEV *UpperBound = getUpperBound(Bound))
2812 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
2813 return false;
2814 return true;
2815}
2816
2817
2818// Computes the upper and lower bounds for level K
2819// using the * direction. Records them in Bound.
2820// Wolfe gives the equations
2821//
2822// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2823// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2824//
2825// Since we normalize loops, we can simplify these equations to
2826//
2827// LB^*_k = (A^-_k - B^+_k)U_k
2828// UB^*_k = (A^+_k - B^-_k)U_k
2829//
2830// We must be careful to handle the case where the upper bound is unknown.
2831// Note that the lower bound is always <= 0
2832// and the upper bound is always >= 0.
2833void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2834 BoundInfo *Bound, unsigned K) const {
2835 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
2836 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
2837 if (Bound[K].Iterations) {
2838 Bound[K].Lower[Dependence::DVEntry::ALL] =
2839 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
2840 Bound[K].Iterations);
2841 Bound[K].Upper[Dependence::DVEntry::ALL] =
2842 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
2843 Bound[K].Iterations);
2844 }
2845 else {
2846 // If the difference is 0, we won't need to know the number of iterations.
2847 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
2848 Bound[K].Lower[Dependence::DVEntry::ALL] =
2849 SE->getZero(A[K].Coeff->getType());
2850 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
2851 Bound[K].Upper[Dependence::DVEntry::ALL] =
2852 SE->getZero(A[K].Coeff->getType());
2853 }
2854}
2855
2856
2857// Computes the upper and lower bounds for level K
2858// using the = direction. Records them in Bound.
2859// Wolfe gives the equations
2860//
2861// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2862// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2863//
2864// Since we normalize loops, we can simplify these equations to
2865//
2866// LB^=_k = (A_k - B_k)^- U_k
2867// UB^=_k = (A_k - B_k)^+ U_k
2868//
2869// We must be careful to handle the case where the upper bound is unknown.
2870// Note that the lower bound is always <= 0
2871// and the upper bound is always >= 0.
2872void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2873 BoundInfo *Bound, unsigned K) const {
2874 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
2875 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
2876 if (Bound[K].Iterations) {
2877 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2878 const SCEV *NegativePart = getNegativePart(Delta);
2879 Bound[K].Lower[Dependence::DVEntry::EQ] =
2880 SE->getMulExpr(NegativePart, Bound[K].Iterations);
2881 const SCEV *PositivePart = getPositivePart(Delta);
2882 Bound[K].Upper[Dependence::DVEntry::EQ] =
2883 SE->getMulExpr(PositivePart, Bound[K].Iterations);
2884 }
2885 else {
2886 // If the positive/negative part of the difference is 0,
2887 // we won't need to know the number of iterations.
2888 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2889 const SCEV *NegativePart = getNegativePart(Delta);
2890 if (NegativePart->isZero())
2891 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2892 const SCEV *PositivePart = getPositivePart(Delta);
2893 if (PositivePart->isZero())
2894 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2895 }
2896}
2897
2898
2899// Computes the upper and lower bounds for level K
2900// using the < direction. Records them in Bound.
2901// Wolfe gives the equations
2902//
2903// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2904// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2905//
2906// Since we normalize loops, we can simplify these equations to
2907//
2908// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2909// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2910//
2911// We must be careful to handle the case where the upper bound is unknown.
2912void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2913 BoundInfo *Bound, unsigned K) const {
2914 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
2915 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
2916 if (Bound[K].Iterations) {
2917 const SCEV *Iter_1 = SE->getMinusSCEV(
2918 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2919 const SCEV *NegPart =
2920 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2921 Bound[K].Lower[Dependence::DVEntry::LT] =
2922 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
2923 const SCEV *PosPart =
2924 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2925 Bound[K].Upper[Dependence::DVEntry::LT] =
2926 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
2927 }
2928 else {
2929 // If the positive/negative part of the difference is 0,
2930 // we won't need to know the number of iterations.
2931 const SCEV *NegPart =
2932 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2933 if (NegPart->isZero())
2934 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2935 const SCEV *PosPart =
2936 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2937 if (PosPart->isZero())
2938 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2939 }
2940}
2941
2942
2943// Computes the upper and lower bounds for level K
2944// using the > direction. Records them in Bound.
2945// Wolfe gives the equations
2946//
2947// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2948// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2949//
2950// Since we normalize loops, we can simplify these equations to
2951//
2952// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2953// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2954//
2955// We must be careful to handle the case where the upper bound is unknown.
2956void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2957 BoundInfo *Bound, unsigned K) const {
2958 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
2959 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
2960 if (Bound[K].Iterations) {
2961 const SCEV *Iter_1 = SE->getMinusSCEV(
2962 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2963 const SCEV *NegPart =
2964 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2965 Bound[K].Lower[Dependence::DVEntry::GT] =
2966 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
2967 const SCEV *PosPart =
2968 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2969 Bound[K].Upper[Dependence::DVEntry::GT] =
2970 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
2971 }
2972 else {
2973 // If the positive/negative part of the difference is 0,
2974 // we won't need to know the number of iterations.
2975 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2976 if (NegPart->isZero())
2977 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2978 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2979 if (PosPart->isZero())
2980 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2981 }
2982}
2983
2984
2985// X^+ = max(X, 0)
2986const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2987 return SE->getSMaxExpr(X, SE->getZero(X->getType()));
2988}
2989
2990
2991// X^- = min(X, 0)
2992const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2993 return SE->getSMinExpr(X, SE->getZero(X->getType()));
2994}
2995
2996
2997// Walks through the subscript,
2998// collecting each coefficient, the associated loop bounds,
2999// and recording its positive and negative parts for later use.
3000DependenceInfo::CoefficientInfo *
3001DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
3002 const SCEV *&Constant) const {
3003 const SCEV *Zero = SE->getZero(Subscript->getType());
3004 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
3005 for (unsigned K = 1; K <= MaxLevels; ++K) {
3006 CI[K].Coeff = Zero;
3007 CI[K].PosPart = Zero;
3008 CI[K].NegPart = Zero;
3009 CI[K].Iterations = nullptr;
3010 }
3011 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
3012 const Loop *L = AddRec->getLoop();
3013 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
3014 CI[K].Coeff = AddRec->getStepRecurrence(*SE);
3015 CI[K].PosPart = getPositivePart(CI[K].Coeff);
3016 CI[K].NegPart = getNegativePart(CI[K].Coeff);
3017 CI[K].Iterations = collectUpperBound(L, Subscript->getType());
3018 Subscript = AddRec->getStart();
3019 }
3020 Constant = Subscript;
3021#ifndef NDEBUG
3022 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
3023 for (unsigned K = 1; K <= MaxLevels; ++K) {
3024 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
3025 LLVM_DEBUG(dbgs() << "\tPos Part = ");
3026 LLVM_DEBUG(dbgs() << *CI[K].PosPart);
3027 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
3028 LLVM_DEBUG(dbgs() << *CI[K].NegPart);
3029 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
3030 if (CI[K].Iterations)
3031 LLVM_DEBUG(dbgs() << *CI[K].Iterations);
3032 else
3033 LLVM_DEBUG(dbgs() << "+inf");
3034 LLVM_DEBUG(dbgs() << '\n');
3035 }
3036 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
3037#endif
3038 return CI;
3039}
3040
3041
3042// Looks through all the bounds info and
3043// computes the lower bound given the current direction settings
3044// at each level. If the lower bound for any level is -inf,
3045// the result is -inf.
3046const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
3047 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
3048 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3049 if (Bound[K].Lower[Bound[K].Direction])
3050 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
3051 else
3052 Sum = nullptr;
3053 }
3054 return Sum;
3055}
3056
3057
3058// Looks through all the bounds info and
3059// computes the upper bound given the current direction settings
3060// at each level. If the upper bound at any level is +inf,
3061// the result is +inf.
3062const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
3063 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
3064 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3065 if (Bound[K].Upper[Bound[K].Direction])
3066 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
3067 else
3068 Sum = nullptr;
3069 }
3070 return Sum;
3071}
3072
3073
3074//===----------------------------------------------------------------------===//
3075// Constraint manipulation for Delta test.
3076
3077// Given a linear SCEV,
3078// return the coefficient (the step)
3079// corresponding to the specified loop.
3080// If there isn't one, return 0.
3081// For example, given a*i + b*j + c*k, finding the coefficient
3082// corresponding to the j loop would yield b.
3083const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,
3084 const Loop *TargetLoop) const {
3085 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3086 if (!AddRec)
3087 return SE->getZero(Expr->getType());
3088 if (AddRec->getLoop() == TargetLoop)
3089 return AddRec->getStepRecurrence(*SE);
3090 return findCoefficient(AddRec->getStart(), TargetLoop);
3091}
3092
3093
3094// Given a linear SCEV,
3095// return the SCEV given by zeroing out the coefficient
3096// corresponding to the specified loop.
3097// For example, given a*i + b*j + c*k, zeroing the coefficient
3098// corresponding to the j loop would yield a*i + c*k.
3099const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,
3100 const Loop *TargetLoop) const {
3101 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3102 if (!AddRec)
3103 return Expr; // ignore
3104 if (AddRec->getLoop() == TargetLoop)
3105 return AddRec->getStart();
3106 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
3107 AddRec->getStepRecurrence(*SE),
3108 AddRec->getLoop(),
3109 AddRec->getNoWrapFlags());
3110}
3111
3112
3113// Given a linear SCEV Expr,
3114// return the SCEV given by adding some Value to the
3115// coefficient corresponding to the specified TargetLoop.
3116// For example, given a*i + b*j + c*k, adding 1 to the coefficient
3117// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
3118const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,
3119 const Loop *TargetLoop,
3120 const SCEV *Value) const {
3121 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3122 if (!AddRec) // create a new addRec
3123 return SE->getAddRecExpr(Expr,
3124 Value,
3125 TargetLoop,
3126 SCEV::FlagAnyWrap); // Worst case, with no info.
3127 if (AddRec->getLoop() == TargetLoop) {
3128 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
3129 if (Sum->isZero())
3130 return AddRec->getStart();
3131 return SE->getAddRecExpr(AddRec->getStart(),
3132 Sum,
3133 AddRec->getLoop(),
3134 AddRec->getNoWrapFlags());
3135 }
3136 if (SE->isLoopInvariant(AddRec, TargetLoop))
3137 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
3138 return SE->getAddRecExpr(
3139 addToCoefficient(AddRec->getStart(), TargetLoop, Value),
3140 AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
3141 AddRec->getNoWrapFlags());
3142}
3143
3144
3145// Review the constraints, looking for opportunities
3146// to simplify a subscript pair (Src and Dst).
3147// Return true if some simplification occurs.
3148// If the simplification isn't exact (that is, if it is conservative
3149// in terms of dependence), set consistent to false.
3150// Corresponds to Figure 5 from the paper
3151//
3152// Practical Dependence Testing
3153// Goff, Kennedy, Tseng
3154// PLDI 1991
3155bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
3157 SmallVectorImpl<Constraint> &Constraints,
3158 bool &Consistent) {
3159 bool Result = false;
3160 for (unsigned LI : Loops.set_bits()) {
3161 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
3162 LLVM_DEBUG(Constraints[LI].dump(dbgs()));
3163 if (Constraints[LI].isDistance())
3164 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
3165 else if (Constraints[LI].isLine())
3166 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
3167 else if (Constraints[LI].isPoint())
3168 Result |= propagatePoint(Src, Dst, Constraints[LI]);
3169 }
3170 return Result;
3171}
3172
3173
3174// Attempt to propagate a distance
3175// constraint into a subscript pair (Src and Dst).
3176// Return true if some simplification occurs.
3177// If the simplification isn't exact (that is, if it is conservative
3178// in terms of dependence), set consistent to false.
3179bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
3180 Constraint &CurConstraint,
3181 bool &Consistent) {
3182 const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3183 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3184 const SCEV *A_K = findCoefficient(Src, CurLoop);
3185 if (A_K->isZero())
3186 return false;
3187 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
3188 Src = SE->getMinusSCEV(Src, DA_K);
3189 Src = zeroCoefficient(Src, CurLoop);
3190 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3191 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3192 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
3193 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3194 if (!findCoefficient(Dst, CurLoop)->isZero())
3195 Consistent = false;
3196 return true;
3197}
3198
3199
3200// Attempt to propagate a line
3201// constraint into a subscript pair (Src and Dst).
3202// Return true if some simplification occurs.
3203// If the simplification isn't exact (that is, if it is conservative
3204// in terms of dependence), set consistent to false.
3205bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
3206 Constraint &CurConstraint,
3207 bool &Consistent) {
3208 const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3209 const SCEV *A = CurConstraint.getA();
3210 const SCEV *B = CurConstraint.getB();
3211 const SCEV *C = CurConstraint.getC();
3212 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C
3213 << "\n");
3214 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
3215 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
3216 if (A->isZero()) {
3217 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
3218 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3219 if (!Bconst || !Cconst) return false;
3220 APInt Beta = Bconst->getAPInt();
3221 APInt Charlie = Cconst->getAPInt();
3222 APInt CdivB = Charlie.sdiv(Beta);
3223 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
3224 const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3225 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3226 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3227 Dst = zeroCoefficient(Dst, CurLoop);
3228 if (!findCoefficient(Src, CurLoop)->isZero())
3229 Consistent = false;
3230 }
3231 else if (B->isZero()) {
3232 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3233 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3234 if (!Aconst || !Cconst) return false;
3235 APInt Alpha = Aconst->getAPInt();
3236 APInt Charlie = Cconst->getAPInt();
3237 APInt CdivA = Charlie.sdiv(Alpha);
3238 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3239 const SCEV *A_K = findCoefficient(Src, CurLoop);
3240 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3241 Src = zeroCoefficient(Src, CurLoop);
3242 if (!findCoefficient(Dst, CurLoop)->isZero())
3243 Consistent = false;
3244 }
3245 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
3246 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3247 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3248 if (!Aconst || !Cconst) return false;
3249 APInt Alpha = Aconst->getAPInt();
3250 APInt Charlie = Cconst->getAPInt();
3251 APInt CdivA = Charlie.sdiv(Alpha);
3252 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3253 const SCEV *A_K = findCoefficient(Src, CurLoop);
3254 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3255 Src = zeroCoefficient(Src, CurLoop);
3256 Dst = addToCoefficient(Dst, CurLoop, A_K);
3257 if (!findCoefficient(Dst, CurLoop)->isZero())
3258 Consistent = false;
3259 }
3260 else {
3261 // paper is incorrect here, or perhaps just misleading
3262 const SCEV *A_K = findCoefficient(Src, CurLoop);
3263 Src = SE->getMulExpr(Src, A);
3264 Dst = SE->getMulExpr(Dst, A);
3265 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
3266 Src = zeroCoefficient(Src, CurLoop);
3267 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
3268 if (!findCoefficient(Dst, CurLoop)->isZero())
3269 Consistent = false;
3270 }
3271 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
3272 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
3273 return true;
3274}
3275
3276
3277// Attempt to propagate a point
3278// constraint into a subscript pair (Src and Dst).
3279// Return true if some simplification occurs.
3280bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
3281 Constraint &CurConstraint) {
3282 const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3283 const SCEV *A_K = findCoefficient(Src, CurLoop);
3284 const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3285 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
3286 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
3287 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3288 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
3289 Src = zeroCoefficient(Src, CurLoop);
3290 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3291 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3292 Dst = zeroCoefficient(Dst, CurLoop);
3293 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3294 return true;
3295}
3296
3297
3298// Update direction vector entry based on the current constraint.
3299void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
3300 const Constraint &CurConstraint) const {
3301 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
3302 LLVM_DEBUG(CurConstraint.dump(dbgs()));
3303 if (CurConstraint.isAny())
3304 ; // use defaults
3305 else if (CurConstraint.isDistance()) {
3306 // this one is consistent, the others aren't
3307 Level.Scalar = false;
3308 Level.Distance = CurConstraint.getD();
3309 unsigned NewDirection = Dependence::DVEntry::NONE;
3310 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
3311 NewDirection = Dependence::DVEntry::EQ;
3312 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
3313 NewDirection |= Dependence::DVEntry::LT;
3314 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
3315 NewDirection |= Dependence::DVEntry::GT;
3316 Level.Direction &= NewDirection;
3317 }
3318 else if (CurConstraint.isLine()) {
3319 Level.Scalar = false;
3320 Level.Distance = nullptr;
3321 // direction should be accurate
3322 }
3323 else if (CurConstraint.isPoint()) {
3324 Level.Scalar = false;
3325 Level.Distance = nullptr;
3326 unsigned NewDirection = Dependence::DVEntry::NONE;
3327 if (!isKnownPredicate(CmpInst::ICMP_NE,
3328 CurConstraint.getY(),
3329 CurConstraint.getX()))
3330 // if X may be = Y
3331 NewDirection |= Dependence::DVEntry::EQ;
3332 if (!isKnownPredicate(CmpInst::ICMP_SLE,
3333 CurConstraint.getY(),
3334 CurConstraint.getX()))
3335 // if Y may be > X
3336 NewDirection |= Dependence::DVEntry::LT;
3337 if (!isKnownPredicate(CmpInst::ICMP_SGE,
3338 CurConstraint.getY(),
3339 CurConstraint.getX()))
3340 // if Y may be < X
3341 NewDirection |= Dependence::DVEntry::GT;
3342 Level.Direction &= NewDirection;
3343 }
3344 else
3345 llvm_unreachable("constraint has unexpected kind");
3346}
3347
3348/// Check if we can delinearize the subscripts. If the SCEVs representing the
3349/// source and destination array references are recurrences on a nested loop,
3350/// this function flattens the nested recurrences into separate recurrences
3351/// for each loop level.
3352bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3354 assert(isLoadOrStore(Src) && "instruction is not load or store");
3355 assert(isLoadOrStore(Dst) && "instruction is not load or store");
3356 Value *SrcPtr = getLoadStorePointerOperand(Src);
3357 Value *DstPtr = getLoadStorePointerOperand(Dst);
3358 Loop *SrcLoop = LI->getLoopFor(Src->getParent());
3359 Loop *DstLoop = LI->getLoopFor(Dst->getParent());
3360 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
3361 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
3362 const SCEVUnknown *SrcBase =
3363 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3364 const SCEVUnknown *DstBase =
3365 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3366
3367 if (!SrcBase || !DstBase || SrcBase != DstBase)
3368 return false;
3369
3370 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3371
3372 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
3373 SrcSubscripts, DstSubscripts) &&
3374 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
3375 SrcSubscripts, DstSubscripts))
3376 return false;
3377
3378 int Size = SrcSubscripts.size();
3379 LLVM_DEBUG({
3380 dbgs() << "\nSrcSubscripts: ";
3381 for (int I = 0; I < Size; I++)
3382 dbgs() << *SrcSubscripts[I];
3383 dbgs() << "\nDstSubscripts: ";
3384 for (int I = 0; I < Size; I++)
3385 dbgs() << *DstSubscripts[I];
3386 });
3387
3388 // The delinearization transforms a single-subscript MIV dependence test into
3389 // a multi-subscript SIV dependence test that is easier to compute. So we
3390 // resize Pair to contain as many pairs of subscripts as the delinearization
3391 // has found, and then initialize the pairs following the delinearization.
3392 Pair.resize(Size);
3393 for (int I = 0; I < Size; ++I) {
3394 Pair[I].Src = SrcSubscripts[I];
3395 Pair[I].Dst = DstSubscripts[I];
3396 unifySubscriptType(&Pair[I]);
3397 }
3398
3399 return true;
3400}
3401
3402/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
3403/// arrays accessed are fixed-size arrays. Return true if delinearization was
3404/// successful.
3405bool DependenceInfo::tryDelinearizeFixedSize(
3406 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3407 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3408 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3409 LLVM_DEBUG({
3410 const SCEVUnknown *SrcBase =
3411 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3412 const SCEVUnknown *DstBase =
3413 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3414 assert(SrcBase && DstBase && SrcBase == DstBase &&
3415 "expected src and dst scev unknowns to be equal");
3416 });
3417
3418 SmallVector<int, 4> SrcSizes;
3419 SmallVector<int, 4> DstSizes;
3420 if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts,
3421 SrcSizes) ||
3422 !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts,
3423 DstSizes))
3424 return false;
3425
3426 // Check that the two size arrays are non-empty and equal in length and
3427 // value.
3428 if (SrcSizes.size() != DstSizes.size() ||
3429 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
3430 SrcSubscripts.clear();
3431 DstSubscripts.clear();
3432 return false;
3433 }
3434
3435 assert(SrcSubscripts.size() == DstSubscripts.size() &&
3436 "Expected equal number of entries in the list of SrcSubscripts and "
3437 "DstSubscripts.");
3438
3439 Value *SrcPtr = getLoadStorePointerOperand(Src);
3440 Value *DstPtr = getLoadStorePointerOperand(Dst);
3441
3442 // In general we cannot safely assume that the subscripts recovered from GEPs
3443 // are in the range of values defined for their corresponding array
3444 // dimensions. For example some C language usage/interpretation make it
3445 // impossible to verify this at compile-time. As such we can only delinearize
3446 // iff the subscripts are positive and are less than the range of the
3447 // dimension.
3449 auto AllIndicesInRange = [&](SmallVector<int, 4> &DimensionSizes,
3451 Value *Ptr) {
3452 size_t SSize = Subscripts.size();
3453 for (size_t I = 1; I < SSize; ++I) {
3454 const SCEV *S = Subscripts[I];
3455 if (!isKnownNonNegative(S, Ptr))
3456 return false;
3457 if (auto *SType = dyn_cast<IntegerType>(S->getType())) {
3458 const SCEV *Range = SE->getConstant(
3459 ConstantInt::get(SType, DimensionSizes[I - 1], false));
3460 if (!isKnownLessThan(S, Range))
3461 return false;
3462 }
3463 }
3464 return true;
3465 };
3466
3467 if (!AllIndicesInRange(SrcSizes, SrcSubscripts, SrcPtr) ||
3468 !AllIndicesInRange(DstSizes, DstSubscripts, DstPtr)) {
3469 SrcSubscripts.clear();
3470 DstSubscripts.clear();
3471 return false;
3472 }
3473 }
3474 LLVM_DEBUG({
3475 dbgs() << "Delinearized subscripts of fixed-size array\n"
3476 << "SrcGEP:" << *SrcPtr << "\n"
3477 << "DstGEP:" << *DstPtr << "\n";
3478 });
3479 return true;
3480}
3481
3482bool DependenceInfo::tryDelinearizeParametricSize(
3483 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3484 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3485 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3486
3487 Value *SrcPtr = getLoadStorePointerOperand(Src);
3488 Value *DstPtr = getLoadStorePointerOperand(Dst);
3489 const SCEVUnknown *SrcBase =
3490 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3491 const SCEVUnknown *DstBase =
3492 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3493 assert(SrcBase && DstBase && SrcBase == DstBase &&
3494 "expected src and dst scev unknowns to be equal");
3495
3496 const SCEV *ElementSize = SE->getElementSize(Src);
3497 if (ElementSize != SE->getElementSize(Dst))
3498 return false;
3499
3500 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
3501 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
3502
3503 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
3504 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
3505 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
3506 return false;
3507
3508 // First step: collect parametric terms in both array references.
3510 collectParametricTerms(*SE, SrcAR, Terms);
3511 collectParametricTerms(*SE, DstAR, Terms);
3512
3513 // Second step: find subscript sizes.
3515 findArrayDimensions(*SE, Terms, Sizes, ElementSize);
3516
3517 // Third step: compute the access functions for each subscript.
3518 computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
3519 computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
3520
3521 // Fail when there is only a subscript: that's a linearized access function.
3522 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
3523 SrcSubscripts.size() != DstSubscripts.size())
3524 return false;
3525
3526 size_t Size = SrcSubscripts.size();
3527
3528 // Statically check that the array bounds are in-range. The first subscript we
3529 // don't have a size for and it cannot overflow into another subscript, so is
3530 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3531 // and dst.
3532 // FIXME: It may be better to record these sizes and add them as constraints
3533 // to the dependency checks.
3535 for (size_t I = 1; I < Size; ++I) {
3536 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
3537 return false;
3538
3539 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
3540 return false;
3541
3542 if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
3543 return false;
3544
3545 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
3546 return false;
3547 }
3548
3549 return true;
3550}
3551
3552//===----------------------------------------------------------------------===//
3553
3554#ifndef NDEBUG
3555// For debugging purposes, dump a small bit vector to dbgs().
3557 dbgs() << "{";
3558 for (unsigned VI : BV.set_bits()) {
3559 dbgs() << VI;
3560 if (BV.find_next(VI) >= 0)
3561 dbgs() << ' ';
3562 }
3563 dbgs() << "}\n";
3564}
3565#endif
3566
3569 // Check if the analysis itself has been invalidated.
3570 auto PAC = PA.getChecker<DependenceAnalysis>();
3571 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3572 return true;
3573
3574 // Check transitive dependencies.
3575 return Inv.invalidate<AAManager>(F, PA) ||
3577 Inv.invalidate<LoopAnalysis>(F, PA);
3578}
3579
3580// depends -
3581// Returns NULL if there is no dependence.
3582// Otherwise, return a Dependence with as many details as possible.
3583// Corresponds to Section 3.1 in the paper
3584//
3585// Practical Dependence Testing
3586// Goff, Kennedy, Tseng
3587// PLDI 1991
3588//
3589// Care is required to keep the routine below, getSplitIteration(),
3590// up to date with respect to this routine.
3591std::unique_ptr<Dependence>
3593 bool PossiblyLoopIndependent = true;
3594 if (Src == Dst)
3595 PossiblyLoopIndependent = false;
3596
3597 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
3598 // if both instructions don't reference memory, there's no dependence
3599 return nullptr;
3600
3601 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
3602 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3603 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3604 return std::make_unique<Dependence>(Src, Dst);
3605 }
3606
3607 assert(isLoadOrStore(Src) && "instruction is not load or store");
3608 assert(isLoadOrStore(Dst) && "instruction is not load or store");
3609 Value *SrcPtr = getLoadStorePointerOperand(Src);
3610 Value *DstPtr = getLoadStorePointerOperand(Dst);
3611
3612 switch (underlyingObjectsAlias(AA, F->getDataLayout(),
3614 MemoryLocation::get(Src))) {
3617 // cannot analyse objects if we don't understand their aliasing.
3618 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3619 return std::make_unique<Dependence>(Src, Dst);
3621 // If the objects noalias, they are distinct, accesses are independent.
3622 LLVM_DEBUG(dbgs() << "no alias\n");
3623 return nullptr;
3625 break; // The underlying objects alias; test accesses for dependence.
3626 }
3627
3628 // establish loop nesting levels
3629 establishNestingLevels(Src, Dst);
3630 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
3631 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
3632
3633 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
3634 ++TotalArrayPairs;
3635
3636 unsigned Pairs = 1;
3637 SmallVector<Subscript, 2> Pair(Pairs);
3638 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3639 const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3640 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
3641 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
3642 if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {
3643 // If two pointers have different bases, trying to analyze indexes won't
3644 // work; we can't compare them to each other. This can happen, for example,
3645 // if one is produced by an LCSSA PHI node.
3646 //
3647 // We check this upfront so we don't crash in cases where getMinusSCEV()
3648 // returns a SCEVCouldNotCompute.
3649 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3650 return std::make_unique<Dependence>(Src, Dst);
3651 }
3652 Pair[0].Src = SrcSCEV;
3653 Pair[0].Dst = DstSCEV;
3654
3655 if (Delinearize) {
3656 if (tryDelinearize(Src, Dst, Pair)) {
3657 LLVM_DEBUG(dbgs() << " delinearized\n");
3658 Pairs = Pair.size();
3659 }
3660 }
3661
3662 for (unsigned P = 0; P < Pairs; ++P) {
3663 Pair[P].Loops.resize(MaxLevels + 1);
3664 Pair[P].GroupLoops.resize(MaxLevels + 1);
3665 Pair[P].Group.resize(Pairs);
3666 removeMatchingExtensions(&Pair[P]);
3667 Pair[P].Classification =
3668 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3669 Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3670 Pair[P].Loops);
3671 Pair[P].GroupLoops = Pair[P].Loops;
3672 Pair[P].Group.set(P);
3673 LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
3674 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3675 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3676 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3677 LLVM_DEBUG(dbgs() << "\tloops = ");
3679 }
3680
3681 SmallBitVector Separable(Pairs);
3682 SmallBitVector Coupled(Pairs);
3683
3684 // Partition subscripts into separable and minimally-coupled groups
3685 // Algorithm in paper is algorithmically better;
3686 // this may be faster in practice. Check someday.
3687 //
3688 // Here's an example of how it works. Consider this code:
3689 //
3690 // for (i = ...) {
3691 // for (j = ...) {
3692 // for (k = ...) {
3693 // for (l = ...) {
3694 // for (m = ...) {
3695 // A[i][j][k][m] = ...;
3696 // ... = A[0][j][l][i + j];
3697 // }
3698 // }
3699 // }
3700 // }
3701 // }
3702 //
3703 // There are 4 subscripts here:
3704 // 0 [i] and [0]
3705 // 1 [j] and [j]
3706 // 2 [k] and [l]
3707 // 3 [m] and [i + j]
3708 //
3709 // We've already classified each subscript pair as ZIV, SIV, etc.,
3710 // and collected all the loops mentioned by pair P in Pair[P].Loops.
3711 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3712 // and set Pair[P].Group = {P}.
3713 //
3714 // Src Dst Classification Loops GroupLoops Group
3715 // 0 [i] [0] SIV {1} {1} {0}
3716 // 1 [j] [j] SIV {2} {2} {1}
3717 // 2 [k] [l] RDIV {3,4} {3,4} {2}
3718 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}
3719 //
3720 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3721 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3722 //
3723 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3724 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3725 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3726 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3727 // to either Separable or Coupled).
3728 //
3729 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3730 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
3731 // so Pair[3].Group = {0, 1, 3} and Done = false.
3732 //
3733 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3734 // Since Done remains true, we add 2 to the set of Separable pairs.
3735 //
3736 // Finally, we consider 3. There's nothing to compare it with,
3737 // so Done remains true and we add it to the Coupled set.
3738 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3739 //
3740 // In the end, we've got 1 separable subscript and 1 coupled group.
3741 for (unsigned SI = 0; SI < Pairs; ++SI) {
3742 if (Pair[SI].Classification == Subscript::NonLinear) {
3743 // ignore these, but collect loops for later
3744 ++NonlinearSubscriptPairs;
3745 collectCommonLoops(Pair[SI].Src,
3746 LI->getLoopFor(Src->getParent()),
3747 Pair[SI].Loops);
3748 collectCommonLoops(Pair[SI].Dst,
3749 LI->getLoopFor(Dst->getParent()),
3750 Pair[SI].Loops);
3751 Result.Consistent = false;
3752 } else if (Pair[SI].Classification == Subscript::ZIV) {
3753 // always separable
3754 Separable.set(SI);
3755 }
3756 else {
3757 // SIV, RDIV, or MIV, so check for coupled group
3758 bool Done = true;
3759 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
3760 SmallBitVector Intersection = Pair[SI].GroupLoops;
3761 Intersection &= Pair[SJ].GroupLoops;
3762 if (Intersection.any()) {
3763 // accumulate set of all the loops in group
3764 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3765 // accumulate set of all subscripts in group
3766 Pair[SJ].Group |= Pair[SI].Group;
3767 Done = false;
3768 }
3769 }
3770 if (Done) {
3771 if (Pair[SI].Group.count() == 1) {
3772 Separable.set(SI);
3773 ++SeparableSubscriptPairs;
3774 }
3775 else {
3776 Coupled.set(SI);
3777 ++CoupledSubscriptPairs;
3778 }
3779 }
3780 }
3781 }
3782
3783 LLVM_DEBUG(dbgs() << " Separable = ");
3784 LLVM_DEBUG(dumpSmallBitVector(Separable));
3785 LLVM_DEBUG(dbgs() << " Coupled = ");
3787
3788 Constraint NewConstraint;
3789 NewConstraint.setAny(SE);
3790
3791 // test separable subscripts
3792 for (unsigned SI : Separable.set_bits()) {
3793 LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3794 switch (Pair[SI].Classification) {
3795 case Subscript::ZIV:
3796 LLVM_DEBUG(dbgs() << ", ZIV\n");
3797 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
3798 return nullptr;
3799 break;
3800 case Subscript::SIV: {
3801 LLVM_DEBUG(dbgs() << ", SIV\n");
3802 unsigned Level;
3803 const SCEV *SplitIter = nullptr;
3804 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
3805 SplitIter))
3806 return nullptr;
3807 break;
3808 }
3809 case Subscript::RDIV:
3810 LLVM_DEBUG(dbgs() << ", RDIV\n");
3811 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
3812 return nullptr;
3813 break;
3814 case Subscript::MIV:
3815 LLVM_DEBUG(dbgs() << ", MIV\n");
3816 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
3817 return nullptr;
3818 break;
3819 default:
3820 llvm_unreachable("subscript has unexpected classification");
3821 }
3822 }
3823
3824 if (Coupled.count()) {
3825 // test coupled subscript groups
3826 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
3827 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
3828 SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3829 for (unsigned II = 0; II <= MaxLevels; ++II)
3830 Constraints[II].setAny(SE);
3831 for (unsigned SI : Coupled.set_bits()) {
3832 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
3833 SmallBitVector Group(Pair[SI].Group);
3834 SmallBitVector Sivs(Pairs);
3835 SmallBitVector Mivs(Pairs);
3836 SmallBitVector ConstrainedLevels(MaxLevels + 1);
3837 SmallVector<Subscript *, 4> PairsInGroup;
3838 for (unsigned SJ : Group.set_bits()) {
3839 LLVM_DEBUG(dbgs() << SJ << " ");
3840 if (Pair[SJ].Classification == Subscript::SIV)
3841 Sivs.set(SJ);
3842 else
3843 Mivs.set(SJ);
3844 PairsInGroup.push_back(&Pair[SJ]);
3845 }
3846 unifySubscriptType(PairsInGroup);
3847 LLVM_DEBUG(dbgs() << "}\n");
3848 while (Sivs.any()) {
3849 bool Changed = false;
3850 for (unsigned SJ : Sivs.set_bits()) {
3851 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
3852 // SJ is an SIV subscript that's part of the current coupled group
3853 unsigned Level;
3854 const SCEV *SplitIter = nullptr;
3855 LLVM_DEBUG(dbgs() << "SIV\n");
3856 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
3857 SplitIter))
3858 return nullptr;
3859 ConstrainedLevels.set(Level);
3860 if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
3861 if (Constraints[Level].isEmpty()) {
3862 ++DeltaIndependence;
3863 return nullptr;
3864 }
3865 Changed = true;
3866 }
3867 Sivs.reset(SJ);
3868 }
3869 if (Changed) {
3870 // propagate, possibly creating new SIVs and ZIVs
3871 LLVM_DEBUG(dbgs() << " propagating\n");
3872 LLVM_DEBUG(dbgs() << "\tMivs = ");
3874 for (unsigned SJ : Mivs.set_bits()) {
3875 // SJ is an MIV subscript that's part of the current coupled group
3876 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
3877 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
3878 Constraints, Result.Consistent)) {
3879 LLVM_DEBUG(dbgs() << "\t Changed\n");
3880 ++DeltaPropagations;
3881 Pair[SJ].Classification =
3882 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3883 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3884 Pair[SJ].Loops);
3885 switch (Pair[SJ].Classification) {
3886 case Subscript::ZIV:
3887 LLVM_DEBUG(dbgs() << "ZIV\n");
3888 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3889 return nullptr;
3890 Mivs.reset(SJ);
3891 break;
3892 case Subscript::SIV:
3893 Sivs.set(SJ);
3894 Mivs.reset(SJ);
3895 break;
3896 case Subscript::RDIV:
3897 case Subscript::MIV:
3898 break;
3899 default:
3900 llvm_unreachable("bad subscript classification");
3901 }
3902 }
3903 }
3904 }
3905 }
3906
3907 // test & propagate remaining RDIVs
3908 for (unsigned SJ : Mivs.set_bits()) {
3909 if (Pair[SJ].Classification == Subscript::RDIV) {
3910 LLVM_DEBUG(dbgs() << "RDIV test\n");
3911 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3912 return nullptr;
3913 // I don't yet understand how to propagate RDIV results
3914 Mivs.reset(SJ);
3915 }
3916 }
3917
3918 // test remaining MIVs
3919 // This code is temporary.
3920 // Better to somehow test all remaining subscripts simultaneously.
3921 for (unsigned SJ : Mivs.set_bits()) {
3922 if (Pair[SJ].Classification == Subscript::MIV) {
3923 LLVM_DEBUG(dbgs() << "MIV test\n");
3924 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
3925 return nullptr;
3926 }
3927 else
3928 llvm_unreachable("expected only MIV subscripts at this point");
3929 }
3930
3931 // update Result.DV from constraint vector
3932 LLVM_DEBUG(dbgs() << " updating\n");
3933 for (unsigned SJ : ConstrainedLevels.set_bits()) {
3934 if (SJ > CommonLevels)
3935 break;
3936 updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
3937 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
3938 return nullptr;
3939 }
3940 }
3941 }
3942
3943 // Make sure the Scalar flags are set correctly.
3944 SmallBitVector CompleteLoops(MaxLevels + 1);
3945 for (unsigned SI = 0; SI < Pairs; ++SI)
3946 CompleteLoops |= Pair[SI].Loops;
3947 for (unsigned II = 1; II <= CommonLevels; ++II)
3948 if (CompleteLoops[II])
3949 Result.DV[II - 1].Scalar = false;
3950
3951 if (PossiblyLoopIndependent) {
3952 // Make sure the LoopIndependent flag is set correctly.
3953 // All directions must include equal, otherwise no
3954 // loop-independent dependence is possible.
3955 for (unsigned II = 1; II <= CommonLevels; ++II) {
3956 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
3957 Result.LoopIndependent = false;
3958 break;
3959 }
3960 }
3961 }
3962 else {
3963 // On the other hand, if all directions are equal and there's no
3964 // loop-independent dependence possible, then no dependence exists.
3965 bool AllEqual = true;
3966 for (unsigned II = 1; II <= CommonLevels; ++II) {
3967 if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
3968 AllEqual = false;
3969 break;
3970 }
3971 }
3972 if (AllEqual)
3973 return nullptr;
3974 }
3975
3976 return std::make_unique<FullDependence>(std::move(Result));
3977}
3978
3979//===----------------------------------------------------------------------===//
3980// getSplitIteration -
3981// Rather than spend rarely-used space recording the splitting iteration
3982// during the Weak-Crossing SIV test, we re-compute it on demand.
3983// The re-computation is basically a repeat of the entire dependence test,
3984// though simplified since we know that the dependence exists.
3985// It's tedious, since we must go through all propagations, etc.
3986//
3987// Care is required to keep this code up to date with respect to the routine
3988// above, depends().
3989//
3990// Generally, the dependence analyzer will be used to build
3991// a dependence graph for a function (basically a map from instructions
3992// to dependences). Looking for cycles in the graph shows us loops
3993// that cannot be trivially vectorized/parallelized.
3994//
3995// We can try to improve the situation by examining all the dependences
3996// that make up the cycle, looking for ones we can break.
3997// Sometimes, peeling the first or last iteration of a loop will break
3998// dependences, and we've got flags for those possibilities.
3999// Sometimes, splitting a loop at some other iteration will do the trick,
4000// and we've got a flag for that case. Rather than waste the space to
4001// record the exact iteration (since we rarely know), we provide
4002// a method that calculates the iteration. It's a drag that it must work
4003// from scratch, but wonderful in that it's possible.
4004//
4005// Here's an example:
4006//
4007// for (i = 0; i < 10; i++)
4008// A[i] = ...
4009// ... = A[11 - i]
4010//
4011// There's a loop-carried flow dependence from the store to the load,
4012// found by the weak-crossing SIV test. The dependence will have a flag,
4013// indicating that the dependence can be broken by splitting the loop.
4014// Calling getSplitIteration will return 5.
4015// Splitting the loop breaks the dependence, like so:
4016//
4017// for (i = 0; i <= 5; i++)
4018// A[i] = ...
4019// ... = A[11 - i]
4020// for (i = 6; i < 10; i++)
4021// A[i] = ...
4022// ... = A[11 - i]
4023//
4024// breaks the dependence and allows us to vectorize/parallelize
4025// both loops.
4027 unsigned SplitLevel) {
4028 assert(Dep.isSplitable(SplitLevel) &&
4029 "Dep should be splitable at SplitLevel");
4030 Instruction *Src = Dep.getSrc();
4031 Instruction *Dst = Dep.getDst();
4032 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
4033 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
4034 assert(isLoadOrStore(Src));
4035 assert(isLoadOrStore(Dst));
4036 Value *SrcPtr = getLoadStorePointerOperand(Src);
4037 Value *DstPtr = getLoadStorePointerOperand(Dst);
4039 AA, F->getDataLayout(), MemoryLocation::get(Dst),
4041
4042 // establish loop nesting levels
4043 establishNestingLevels(Src, Dst);
4044
4045 FullDependence Result(Src, Dst, false, CommonLevels);
4046
4047 unsigned Pairs = 1;
4048 SmallVector<Subscript, 2> Pair(Pairs);
4049 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
4050 const SCEV *DstSCEV = SE->getSCEV(DstPtr);
4051 Pair[0].Src = SrcSCEV;
4052 Pair[0].Dst = DstSCEV;
4053
4054 if (Delinearize) {
4055 if (tryDelinearize(Src, Dst, Pair)) {
4056 LLVM_DEBUG(dbgs() << " delinearized\n");
4057 Pairs = Pair.size();
4058 }
4059 }
4060
4061 for (unsigned P = 0; P < Pairs; ++P) {
4062 Pair[P].Loops.resize(MaxLevels + 1);
4063 Pair[P].GroupLoops.resize(MaxLevels + 1);
4064 Pair[P].Group.resize(Pairs);
4065 removeMatchingExtensions(&Pair[P]);
4066 Pair[P].Classification =
4067 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
4068 Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
4069 Pair[P].Loops);
4070 Pair[P].GroupLoops = Pair[P].Loops;
4071 Pair[P].Group.set(P);
4072 }
4073
4074 SmallBitVector Separable(Pairs);
4075 SmallBitVector Coupled(Pairs);
4076
4077 // partition subscripts into separable and minimally-coupled groups
4078 for (unsigned SI = 0; SI < Pairs; ++SI) {
4079 if (Pair[SI].Classification == Subscript::NonLinear) {
4080 // ignore these, but collect loops for later
4081 collectCommonLoops(Pair[SI].Src,
4082 LI->getLoopFor(Src->getParent()),
4083 Pair[SI].Loops);
4084 collectCommonLoops(Pair[SI].Dst,
4085 LI->getLoopFor(Dst->getParent()),
4086 Pair[SI].Loops);
4087 Result.Consistent = false;
4088 }
4089 else if (Pair[SI].Classification == Subscript::ZIV)
4090 Separable.set(SI);
4091 else {
4092 // SIV, RDIV, or MIV, so check for coupled group
4093 bool Done = true;
4094 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
4095 SmallBitVector Intersection = Pair[SI].GroupLoops;
4096 Intersection &= Pair[SJ].GroupLoops;
4097 if (Intersection.any()) {
4098 // accumulate set of all the loops in group
4099 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
4100 // accumulate set of all subscripts in group
4101 Pair[SJ].Group |= Pair[SI].Group;
4102 Done = false;
4103 }
4104 }
4105 if (Done) {
4106 if (Pair[SI].Group.count() == 1)
4107 Separable.set(SI);
4108 else
4109 Coupled.set(SI);
4110 }
4111 }
4112 }
4113
4114 Constraint NewConstraint;
4115 NewConstraint.setAny(SE);
4116
4117 // test separable subscripts
4118 for (unsigned SI : Separable.set_bits()) {
4119 switch (Pair[SI].Classification) {
4120 case Subscript::SIV: {
4121 unsigned Level;
4122 const SCEV *SplitIter = nullptr;
4123 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
4124 Result, NewConstraint, SplitIter);
4125 if (Level == SplitLevel) {
4126 assert(SplitIter != nullptr);
4127 return SplitIter;
4128 }
4129 break;
4130 }
4131 case Subscript::ZIV:
4132 case Subscript::RDIV:
4133 case Subscript::MIV:
4134 break;
4135 default:
4136 llvm_unreachable("subscript has unexpected classification");
4137 }
4138 }
4139
4140 if (Coupled.count()) {
4141 // test coupled subscript groups
4142 SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
4143 for (unsigned II = 0; II <= MaxLevels; ++II)
4144 Constraints[II].setAny(SE);
4145 for (unsigned SI : Coupled.set_bits()) {
4146 SmallBitVector Group(Pair[SI].Group);
4147 SmallBitVector Sivs(Pairs);
4148 SmallBitVector Mivs(Pairs);
4149 SmallBitVector ConstrainedLevels(MaxLevels + 1);
4150 for (unsigned SJ : Group.set_bits()) {
4151 if (Pair[SJ].Classification == Subscript::SIV)
4152 Sivs.set(SJ);
4153 else
4154 Mivs.set(SJ);
4155 }
4156 while (Sivs.any()) {
4157 bool Changed = false;
4158 for (unsigned SJ : Sivs.set_bits()) {
4159 // SJ is an SIV subscript that's part of the current coupled group
4160 unsigned Level;
4161 const SCEV *SplitIter = nullptr;
4162 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
4163 Result, NewConstraint, SplitIter);
4164 if (Level == SplitLevel && SplitIter)
4165 return SplitIter;
4166 ConstrainedLevels.set(Level);
4167 if (intersectConstraints(&Constraints[Level], &NewConstraint))
4168 Changed = true;
4169 Sivs.reset(SJ);
4170 }
4171 if (Changed) {
4172 // propagate, possibly creating new SIVs and ZIVs
4173 for (unsigned SJ : Mivs.set_bits()) {
4174 // SJ is an MIV subscript that's part of the current coupled group
4175 if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
4176 Pair[SJ].Loops, Constraints, Result.Consistent)) {
4177 Pair[SJ].Classification =
4178 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
4179 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
4180 Pair[SJ].Loops);
4181 switch (Pair[SJ].Classification) {
4182 case Subscript::ZIV:
4183 Mivs.reset(SJ);
4184 break;
4185 case Subscript::SIV:
4186 Sivs.set(SJ);
4187 Mivs.reset(SJ);
4188 break;
4189 case Subscript::RDIV:
4190 case Subscript::MIV:
4191 break;
4192 default:
4193 llvm_unreachable("bad subscript classification");
4194 }
4195 }
4196 }
4197 }
4198 }
4199 }
4200 }
4201 llvm_unreachable("somehow reached end of routine");
4202 return nullptr;
4203}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static bool isLoadOrStore(const Instruction *I)
static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, const APInt &Delta, APInt &G, APInt &X, APInt &Y)
static void dumpSmallBitVector(SmallBitVector &BV)
static const SCEVConstant * getConstantPart(const SCEV *Expr)
static APInt ceilingOfQuotient(const APInt &A, const APInt &B)
static APInt floorOfQuotient(const APInt &A, const APInt &B)
static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL, const MemoryLocation &LocA, const MemoryLocation &LocB)
static bool isRemainderZero(const SCEVConstant *Dividend, const SCEVConstant *Divisor)
static cl::opt< bool > Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::desc("Try to delinearize array references."))
static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, ScalarEvolution &SE, bool NormalizeResults)
static cl::opt< unsigned > MIVMaxLevelThreshold("da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::desc("Maximum depth allowed for the recursive algorithm used to " "explore MIV direction vectors."))
static cl::opt< bool > DisableDelinearizationChecks("da-disable-delinearization-checks", cl::Hidden, cl::desc("Disable checks that try to statically verify validity of " "delinearized subscripts. Enabling this option may result in incorrect " "dependence vectors for languages that allow the subscript of one " "dimension to underflow or overflow into another dimension."))
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
Module.h This file contains the declarations for the Module class.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:557
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:78
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1864
APInt abs() const
Get the absolute value.
Definition: APInt.h:1773
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1166
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1237
The possible results of an alias query.
Definition: AliasAnalysis.h:77
@ MayAlias
The two locations may or may not alias.
Definition: AliasAnalysis.h:98
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:95
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:49
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:310
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
Definition: Any.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void enableCrossIterationMode()
Assume that values may come from different cycle iterations.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Legacy pass manager pass to access dependence information.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &, const Module *=nullptr) const override
print - Print out the internal state of the pass.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
AnalysisPass to compute dependence information in a function.
Result run(Function &F, FunctionAnalysisManager &FAM)
DependenceInfo - This class is the main dependence-analysis driver.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle transitive invalidation when the cached analysis results go away.
const SCEV * getSplitIteration(const Dependence &Dep, unsigned Level)
getSplitIteration - Give a dependence that's splittable at some particular level, return the iteratio...
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst)
depends - Tests for a dependence between the Src and Dst instructions.
Dependence - This class represents a dependence between two memory memory references in a function.
virtual unsigned getDirection(unsigned Level) const
getDirection - Returns the direction associated with a particular level.
virtual bool isPeelFirst(unsigned Level) const
isPeelFirst - Returns true if peeling the first iteration from this loop will break this dependence.
Instruction * getDst() const
getDst - Returns the destination instruction for this dependence.
virtual bool isConfused() const
isConfused - Returns true if this dependence is confused (the compiler understands nothing and makes ...
virtual bool isSplitable(unsigned Level) const
isSplitable - Returns true if splitting this loop will break the dependence.
virtual const SCEV * getDistance(unsigned Level) const
getDistance - Returns the distance (or NULL) associated with a particular level.
virtual bool isScalar(unsigned Level) const
isScalar - Returns true if a particular level is scalar; that is, if no subscript in the source or de...
virtual bool isConsistent() const
isConsistent - Returns true if this dependence is consistent (occurs every time the source and destin...
virtual bool isPeelLast(unsigned Level) const
isPeelLast - Returns true if peeling the last iteration from this loop will break this dependence.
virtual unsigned getLevels() const
getLevels - Returns the number of common loops surrounding the source and destination of the dependen...
bool isFlow() const
isFlow - Returns true if this is a flow (aka true) dependence.
bool isInput() const
isInput - Returns true if this is an input dependence.
bool isAnti() const
isAnti - Returns true if this is an anti dependence.
Instruction * getSrc() const
getSrc - Returns the source instruction for this dependence.
virtual bool isLoopIndependent() const
isLoopIndependent - Returns true if this is a loop-independent dependence.
bool isOutput() const
isOutput - Returns true if this is an output dependence.
void dump(raw_ostream &OS) const
dump - For debugging purposes, dumps a dependence to OS.
Class representing an expression and its matching format.
FullDependence - This class represents a dependence between two memory references in a function.
unsigned getDirection(unsigned Level) const override
getDirection - Returns the direction associated with a particular level.
const SCEV * getDistance(unsigned Level) const override
getDistance - Returns the distance (or NULL) associated with a particular level.
bool isDirectionNegative() const override
Check if the direction vector is negative.
bool isSplitable(unsigned Level) const override
isSplitable - Returns true if splitting the loop will break the dependence.
FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent, unsigned Levels)
bool isScalar(unsigned Level) const override
isScalar - Returns true if a particular level is scalar; that is, if no subscript in the source or de...
bool isPeelLast(unsigned Level) const override
isPeelLast - Returns true if peeling the last iteration from this loop will break this dependence.
bool isPeelFirst(unsigned Level) const override
isPeelFirst - Returns true if peeling the first iteration from this loop will break this dependence.
bool normalize(ScalarEvolution *SE) override
If the direction vector is negative, normalize the direction vector to make it non-negative.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:373
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
An instruction for reading from memory.
Definition: Instructions.h:176
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
unsigned getLoopDepth() const
Return the nesting level of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
This class represents a loop nest and can be used to query its properties.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents a constant integer value.
const APInt & getAPInt() const
This is the base class for unary integral cast operator classes.
This node represents multiplication of some number of SCEVs.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getSMaxExpr(const SCEV *LHS, const SCEV *RHS)
bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
const SCEV * getSMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getElementSize(Instruction *Inst)
Return the size of an element read or written by Inst.
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
bool any() const
Returns true if any bit is set.
size_type count() const
Returns the number of bits which are set.
SmallBitVector & reset()
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2217
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2222
APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition: APInt.cpp:771
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ TB
TB - TwoByte - Set if this instruction has a two byte opcode, which starts with a 0x0F byte before th...
Definition: X86BaseInfo.h:735
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Terms)
Collect parametric terms occurring in step expressions (first step of delinearization).
void findArrayDimensions(ScalarEvolution &SE, SmallVectorImpl< const SCEV * > &Terms, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Compute the array dimensions Sizes from the set of Terms extracted from the memory access function of...
void initializeDependenceAnalysisWrapperPassPass(PassRegistry &)
@ Done
Definition: Threading.h:60
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:129
void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes)
Return in Subscripts the access functions for each dimension in Sizes (third step of delinearization)...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:130
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
FunctionPass * createDependenceAnalysisWrapperPass()
createDependenceAnalysisPass - This creates an instance of the DependenceAnalysis wrapper pass.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Dependence::DVEntry - Each level in the distance/direction vector has a direction (or perhaps a union...
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:215