LLVM 22.0.0git
DependenceAnalysis.cpp
Go to the documentation of this file.
1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// DependenceAnalysis is an LLVM pass that analyses dependences between memory
10// accesses. Currently, it is an (incomplete) implementation of the approach
11// described in
12//
13// Practical Dependence Testing
14// Goff, Kennedy, Tseng
15// PLDI 1991
16//
17// There's a single entry point that analyzes the dependence between a pair
18// of memory references in a function, returning either NULL, for no dependence,
19// or a more-or-less detailed description of the dependence between them.
20//
21// Currently, the implementation cannot propagate constraints between
22// coupled RDIV subscripts and lacks a multi-subscript MIV test.
23// Both of these are conservative weaknesses;
24// that is, not a source of correctness problems.
25//
26// Since Clang linearizes some array subscripts, the dependence
27// analysis is using SCEV->delinearize to recover the representation of multiple
28// subscripts, and thus avoid the more expensive and less precise MIV tests. The
29// delinearization is controlled by the flag -da-delinearize.
30//
31// We should pay some careful attention to the possibility of integer overflow
32// in the implementation of the various tests. This could happen with Add,
33// Subtract, or Multiply, with both APInt's and SCEV's.
34//
35// Some non-linear subscript pairs can be handled by the GCD test
36// (and perhaps other tests).
37// Should explore how often these things occur.
38//
39// Finally, it seems like certain test cases expose weaknesses in the SCEV
40// simplification, especially in the handling of sign and zero extensions.
41// It could be useful to spend time exploring these.
42//
43// Please note that this is work in progress and the interface is subject to
44// change.
45//
46//===----------------------------------------------------------------------===//
47// //
48// In memory of Ken Kennedy, 1945 - 2007 //
49// //
50//===----------------------------------------------------------------------===//
51
53#include "llvm/ADT/Statistic.h"
61#include "llvm/IR/Module.h"
64#include "llvm/Support/Debug.h"
67
68using namespace llvm;
69
70#define DEBUG_TYPE "da"
71
72//===----------------------------------------------------------------------===//
73// statistics
74
75STATISTIC(TotalArrayPairs, "Array pairs tested");
76STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
77STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
78STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
79STATISTIC(ZIVapplications, "ZIV applications");
80STATISTIC(ZIVindependence, "ZIV independence");
81STATISTIC(StrongSIVapplications, "Strong SIV applications");
82STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
83STATISTIC(StrongSIVindependence, "Strong SIV independence");
84STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
85STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
86STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
87STATISTIC(ExactSIVapplications, "Exact SIV applications");
88STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
89STATISTIC(ExactSIVindependence, "Exact SIV independence");
90STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
91STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
92STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
93STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
94STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
95STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
96STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
97STATISTIC(DeltaApplications, "Delta applications");
98STATISTIC(DeltaSuccesses, "Delta successes");
99STATISTIC(DeltaIndependence, "Delta independence");
100STATISTIC(DeltaPropagations, "Delta propagations");
101STATISTIC(GCDapplications, "GCD applications");
102STATISTIC(GCDsuccesses, "GCD successes");
103STATISTIC(GCDindependence, "GCD independence");
104STATISTIC(BanerjeeApplications, "Banerjee applications");
105STATISTIC(BanerjeeIndependence, "Banerjee independence");
106STATISTIC(BanerjeeSuccesses, "Banerjee successes");
107
108static cl::opt<bool>
109 Delinearize("da-delinearize", cl::init(true), cl::Hidden,
110 cl::desc("Try to delinearize array references."));
112 "da-disable-delinearization-checks", cl::Hidden,
113 cl::desc(
114 "Disable checks that try to statically verify validity of "
115 "delinearized subscripts. Enabling this option may result in incorrect "
116 "dependence vectors for languages that allow the subscript of one "
117 "dimension to underflow or overflow into another dimension."));
118
120 "da-miv-max-level-threshold", cl::init(7), cl::Hidden,
121 cl::desc("Maximum depth allowed for the recursive algorithm used to "
122 "explore MIV direction vectors."));
123
125 "da-run-siv-routines-only", cl::init(false), cl::ReallyHidden,
126 cl::desc("Run only SIV routines and disable others (ZIV, RDIV, and MIV). "
127 "The purpose is mainly to exclude the influence of those routines "
128 "in regression tests for SIV routines."));
129
130//===----------------------------------------------------------------------===//
131// basics
132
135 auto &AA = FAM.getResult<AAManager>(F);
136 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
137 auto &LI = FAM.getResult<LoopAnalysis>(F);
138 return DependenceInfo(&F, &AA, &SE, &LI);
139}
140
141AnalysisKey DependenceAnalysis::Key;
142
144 "Dependence Analysis", true, true)
150
152
155
159
161 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
162 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
163 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
164 info.reset(new DependenceInfo(&F, &AA, &SE, &LI));
165 return false;
166}
167
169
171
178
179// Used to test the dependence analyzer.
180// Looks through the function, noting instructions that may access memory.
181// Calls depends() on every possible pair and prints out the result.
182// Ignores all other instructions.
184 ScalarEvolution &SE, bool NormalizeResults) {
185 auto *F = DA->getFunction();
186 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
187 ++SrcI) {
188 if (SrcI->mayReadOrWriteMemory()) {
189 for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE;
190 ++DstI) {
191 if (DstI->mayReadOrWriteMemory()) {
192 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
193 OS << " da analyze - ";
194 if (auto D = DA->depends(&*SrcI, &*DstI,
195 /*UnderRuntimeAssumptions=*/true)) {
196
197#ifndef NDEBUG
198 // Verify that the distance being zero is equivalent to the
199 // direction being EQ.
200 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
201 const SCEV *Distance = D->getDistance(Level);
202 bool IsDistanceZero = Distance && Distance->isZero();
203 bool IsDirectionEQ =
204 D->getDirection(Level) == Dependence::DVEntry::EQ;
205 assert(IsDistanceZero == IsDirectionEQ &&
206 "Inconsistent distance and direction.");
207 }
208#endif
209
210 // Normalize negative direction vectors if required by clients.
211 if (NormalizeResults && D->normalize(&SE))
212 OS << "normalized - ";
213 D->dump(OS);
214 for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
215 if (D->isSplitable(Level)) {
216 OS << " da analyze - split level = " << Level;
217 OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
218 OS << "!\n";
219 }
220 }
221 } else
222 OS << "none!\n";
223 }
224 }
225 }
226 }
227 SCEVUnionPredicate Assumptions = DA->getRuntimeAssumptions();
228 if (!Assumptions.isAlwaysTrue()) {
229 OS << "Runtime Assumptions:\n";
230 Assumptions.print(OS, 0);
231 }
232}
233
235 const Module *) const {
237 OS, info.get(), getAnalysis<ScalarEvolutionWrapperPass>().getSE(), false);
238}
239
242 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName()
243 << "':\n";
245 FAM.getResult<ScalarEvolutionAnalysis>(F),
246 NormalizeResults);
247 return PreservedAnalyses::all();
248}
249
250//===----------------------------------------------------------------------===//
251// Dependence methods
252
253// Returns true if this is an input dependence.
255 return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
256}
257
258// Returns true if this is an output dependence.
260 return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
261}
262
263// Returns true if this is an flow (aka true) dependence.
264bool Dependence::isFlow() const {
265 return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
266}
267
268// Returns true if this is an anti dependence.
269bool Dependence::isAnti() const {
270 return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
271}
272
273// Returns true if a particular level is scalar; that is,
274// if no subscript in the source or destination mention the induction
275// variable associated with the loop at this level.
276// Leave this out of line, so it will serve as a virtual method anchor
277bool Dependence::isScalar(unsigned level) const { return false; }
278
279//===----------------------------------------------------------------------===//
280// FullDependence methods
281
283 const SCEVUnionPredicate &Assumes,
284 bool PossiblyLoopIndependent,
285 unsigned CommonLevels)
286 : Dependence(Source, Destination, Assumes), Levels(CommonLevels),
287 LoopIndependent(PossiblyLoopIndependent) {
288 Consistent = true;
289 if (CommonLevels)
290 DV = std::make_unique<DVEntry[]>(CommonLevels);
291}
292
293// FIXME: in some cases the meaning of a negative direction vector
294// may not be straightforward, e.g.,
295// for (int i = 0; i < 32; ++i) {
296// Src: A[i] = ...;
297// Dst: use(A[31 - i]);
298// }
299// The dependency is
300// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and
301// anti { Dst[i] -> Src[31 - i] : when i < 16 },
302// -- hence a [<>].
303// As long as a dependence result contains '>' ('<>', '<=>', "*"), it
304// means that a reversed/normalized dependence needs to be considered
305// as well. Nevertheless, current isDirectionNegative() only returns
306// true with a '>' or '>=' dependency for ease of canonicalizing the
307// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.
309 for (unsigned Level = 1; Level <= Levels; ++Level) {
310 unsigned char Direction = DV[Level - 1].Direction;
311 if (Direction == Dependence::DVEntry::EQ)
312 continue;
313 if (Direction == Dependence::DVEntry::GT ||
314 Direction == Dependence::DVEntry::GE)
315 return true;
316 return false;
317 }
318 return false;
319}
320
322 if (!isDirectionNegative())
323 return false;
324
325 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
326 dump(dbgs()););
327 std::swap(Src, Dst);
328 for (unsigned Level = 1; Level <= Levels; ++Level) {
329 unsigned char Direction = DV[Level - 1].Direction;
330 // Reverse the direction vector, this means LT becomes GT
331 // and GT becomes LT.
332 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
333 if (Direction & Dependence::DVEntry::LT)
334 RevDirection |= Dependence::DVEntry::GT;
335 if (Direction & Dependence::DVEntry::GT)
336 RevDirection |= Dependence::DVEntry::LT;
337 DV[Level - 1].Direction = RevDirection;
338 // Reverse the dependence distance as well.
339 if (DV[Level - 1].Distance != nullptr)
340 DV[Level - 1].Distance = SE->getNegativeSCEV(DV[Level - 1].Distance);
341 }
342
343 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
344 dump(dbgs()););
345 return true;
346}
347
348// The rest are simple getters that hide the implementation.
349
350// getDirection - Returns the direction associated with a particular level.
351unsigned FullDependence::getDirection(unsigned Level) const {
352 assert(0 < Level && Level <= Levels && "Level out of range");
353 return DV[Level - 1].Direction;
354}
355
356// Returns the distance (or NULL) associated with a particular level.
357const SCEV *FullDependence::getDistance(unsigned Level) const {
358 assert(0 < Level && Level <= Levels && "Level out of range");
359 return DV[Level - 1].Distance;
360}
361
362// Returns true if a particular level is scalar; that is,
363// if no subscript in the source or destination mention the induction
364// variable associated with the loop at this level.
365bool FullDependence::isScalar(unsigned Level) const {
366 assert(0 < Level && Level <= Levels && "Level out of range");
367 return DV[Level - 1].Scalar;
368}
369
370// Returns true if peeling the first iteration from this loop
371// will break this dependence.
372bool FullDependence::isPeelFirst(unsigned Level) const {
373 assert(0 < Level && Level <= Levels && "Level out of range");
374 return DV[Level - 1].PeelFirst;
375}
376
377// Returns true if peeling the last iteration from this loop
378// will break this dependence.
379bool FullDependence::isPeelLast(unsigned Level) const {
380 assert(0 < Level && Level <= Levels && "Level out of range");
381 return DV[Level - 1].PeelLast;
382}
383
384// Returns true if splitting this loop will break the dependence.
385bool FullDependence::isSplitable(unsigned Level) const {
386 assert(0 < Level && Level <= Levels && "Level out of range");
387 return DV[Level - 1].Splitable;
388}
389
390//===----------------------------------------------------------------------===//
391// DependenceInfo::Constraint methods
392
393// If constraint is a point <X, Y>, returns X.
394// Otherwise assert.
395const SCEV *DependenceInfo::Constraint::getX() const {
396 assert(Kind == Point && "Kind should be Point");
397 return A;
398}
399
400// If constraint is a point <X, Y>, returns Y.
401// Otherwise assert.
402const SCEV *DependenceInfo::Constraint::getY() const {
403 assert(Kind == Point && "Kind should be Point");
404 return B;
405}
406
407// If constraint is a line AX + BY = C, returns A.
408// Otherwise assert.
409const SCEV *DependenceInfo::Constraint::getA() const {
410 assert((Kind == Line || Kind == Distance) &&
411 "Kind should be Line (or Distance)");
412 return A;
413}
414
415// If constraint is a line AX + BY = C, returns B.
416// Otherwise assert.
417const SCEV *DependenceInfo::Constraint::getB() const {
418 assert((Kind == Line || Kind == Distance) &&
419 "Kind should be Line (or Distance)");
420 return B;
421}
422
423// If constraint is a line AX + BY = C, returns C.
424// Otherwise assert.
425const SCEV *DependenceInfo::Constraint::getC() const {
426 assert((Kind == Line || Kind == Distance) &&
427 "Kind should be Line (or Distance)");
428 return C;
429}
430
431// If constraint is a distance, returns D.
432// Otherwise assert.
433const SCEV *DependenceInfo::Constraint::getD() const {
434 assert(Kind == Distance && "Kind should be Distance");
435 return SE->getNegativeSCEV(C);
436}
437
438// Returns the loop associated with this constraint.
439const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
440 assert((Kind == Distance || Kind == Line || Kind == Point) &&
441 "Kind should be Distance, Line, or Point");
442 return AssociatedLoop;
443}
444
445void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
446 const Loop *CurLoop) {
447 Kind = Point;
448 A = X;
449 B = Y;
450 AssociatedLoop = CurLoop;
451}
452
453void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
454 const SCEV *CC, const Loop *CurLoop) {
455 Kind = Line;
456 A = AA;
457 B = BB;
458 C = CC;
459 AssociatedLoop = CurLoop;
460}
461
462void DependenceInfo::Constraint::setDistance(const SCEV *D,
463 const Loop *CurLoop) {
464 Kind = Distance;
465 A = SE->getOne(D->getType());
466 B = SE->getNegativeSCEV(A);
467 C = SE->getNegativeSCEV(D);
468 AssociatedLoop = CurLoop;
469}
470
471void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
472
473void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
474 SE = NewSE;
475 Kind = Any;
476}
477
478#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
479// For debugging purposes. Dumps the constraint out to OS.
480LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
481 if (isEmpty())
482 OS << " Empty\n";
483 else if (isAny())
484 OS << " Any\n";
485 else if (isPoint())
486 OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
487 else if (isDistance())
488 OS << " Distance is " << *getD() << " (" << *getA() << "*X + " << *getB()
489 << "*Y = " << *getC() << ")\n";
490 else if (isLine())
491 OS << " Line is " << *getA() << "*X + " << *getB() << "*Y = " << *getC()
492 << "\n";
493 else
494 llvm_unreachable("unknown constraint type in Constraint::dump");
495}
496#endif
497
498// Updates X with the intersection
499// of the Constraints X and Y. Returns true if X has changed.
500// Corresponds to Figure 4 from the paper
501//
502// Practical Dependence Testing
503// Goff, Kennedy, Tseng
504// PLDI 1991
505bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
506 ++DeltaApplications;
507 LLVM_DEBUG(dbgs() << "\tintersect constraints\n");
508 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));
509 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));
510 assert(!Y->isPoint() && "Y must not be a Point");
511 if (X->isAny()) {
512 if (Y->isAny())
513 return false;
514 *X = *Y;
515 return true;
516 }
517 if (X->isEmpty())
518 return false;
519 if (Y->isEmpty()) {
520 X->setEmpty();
521 return true;
522 }
523
524 if (X->isDistance() && Y->isDistance()) {
525 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n");
526 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
527 return false;
528 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
529 X->setEmpty();
530 ++DeltaSuccesses;
531 return true;
532 }
533 // Hmmm, interesting situation.
534 // I guess if either is constant, keep it and ignore the other.
535 if (isa<SCEVConstant>(Y->getD())) {
536 *X = *Y;
537 return true;
538 }
539 return false;
540 }
541
542 // At this point, the pseudo-code in Figure 4 of the paper
543 // checks if (X->isPoint() && Y->isPoint()).
544 // This case can't occur in our implementation,
545 // since a Point can only arise as the result of intersecting
546 // two Line constraints, and the right-hand value, Y, is never
547 // the result of an intersection.
548 assert(!(X->isPoint() && Y->isPoint()) &&
549 "We shouldn't ever see X->isPoint() && Y->isPoint()");
550
551 if (X->isLine() && Y->isLine()) {
552 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n");
553 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
554 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
555 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
556 // slopes are equal, so lines are parallel
557 LLVM_DEBUG(dbgs() << "\t\tsame slope\n");
558 Prod1 = SE->getMulExpr(X->getC(), Y->getB());
559 Prod2 = SE->getMulExpr(X->getB(), Y->getC());
560 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
561 return false;
562 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
563 X->setEmpty();
564 ++DeltaSuccesses;
565 return true;
566 }
567 return false;
568 }
569 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
570 // slopes differ, so lines intersect
571 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");
572 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
573 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
574 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
575 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
576 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
577 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
578 const SCEVConstant *C1A2_C2A1 =
579 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
580 const SCEVConstant *C1B2_C2B1 =
581 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
582 const SCEVConstant *A1B2_A2B1 =
583 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
584 const SCEVConstant *A2B1_A1B2 =
585 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
586 if (!C1B2_C2B1 || !C1A2_C2A1 || !A1B2_A2B1 || !A2B1_A1B2)
587 return false;
588 APInt Xtop = C1B2_C2B1->getAPInt();
589 APInt Xbot = A1B2_A2B1->getAPInt();
590 APInt Ytop = C1A2_C2A1->getAPInt();
591 APInt Ybot = A2B1_A1B2->getAPInt();
592 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
593 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
594 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
595 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
596 APInt Xq = Xtop; // these need to be initialized, even
597 APInt Xr = Xtop; // though they're just going to be overwritten
598 APInt::sdivrem(Xtop, Xbot, Xq, Xr);
599 APInt Yq = Ytop;
600 APInt Yr = Ytop;
601 APInt::sdivrem(Ytop, Ybot, Yq, Yr);
602 if (Xr != 0 || Yr != 0) {
603 X->setEmpty();
604 ++DeltaSuccesses;
605 return true;
606 }
607 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
608 if (Xq.slt(0) || Yq.slt(0)) {
609 X->setEmpty();
610 ++DeltaSuccesses;
611 return true;
612 }
613 if (const SCEVConstant *CUB = collectConstantUpperBound(
614 X->getAssociatedLoop(), Prod1->getType())) {
615 const APInt &UpperBound = CUB->getAPInt();
616 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
617 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
618 X->setEmpty();
619 ++DeltaSuccesses;
620 return true;
621 }
622 }
623 X->setPoint(SE->getConstant(Xq), SE->getConstant(Yq),
624 X->getAssociatedLoop());
625 ++DeltaSuccesses;
626 return true;
627 }
628 return false;
629 }
630
631 // if (X->isLine() && Y->isPoint()) This case can't occur.
632 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
633
634 if (X->isPoint() && Y->isLine()) {
635 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n");
636 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
637 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
638 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
639 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
640 return false;
641 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
642 X->setEmpty();
643 ++DeltaSuccesses;
644 return true;
645 }
646 return false;
647 }
648
649 llvm_unreachable("shouldn't reach the end of Constraint intersection");
650 return false;
651}
652
653//===----------------------------------------------------------------------===//
654// DependenceInfo methods
655
656// For debugging purposes. Dumps a dependence to OS.
658 bool Splitable = false;
659 if (isConfused())
660 OS << "confused";
661 else {
662 if (isConsistent())
663 OS << "consistent ";
664 if (isFlow())
665 OS << "flow";
666 else if (isOutput())
667 OS << "output";
668 else if (isAnti())
669 OS << "anti";
670 else if (isInput())
671 OS << "input";
672 unsigned Levels = getLevels();
673 OS << " [";
674 for (unsigned II = 1; II <= Levels; ++II) {
675 if (isSplitable(II))
676 Splitable = true;
677 if (isPeelFirst(II))
678 OS << 'p';
679 const SCEV *Distance = getDistance(II);
680 if (Distance)
681 OS << *Distance;
682 else if (isScalar(II))
683 OS << "S";
684 else {
685 unsigned Direction = getDirection(II);
686 if (Direction == DVEntry::ALL)
687 OS << "*";
688 else {
689 if (Direction & DVEntry::LT)
690 OS << "<";
691 if (Direction & DVEntry::EQ)
692 OS << "=";
693 if (Direction & DVEntry::GT)
694 OS << ">";
695 }
696 }
697 if (isPeelLast(II))
698 OS << 'p';
699 if (II < Levels)
700 OS << " ";
701 }
702 if (isLoopIndependent())
703 OS << "|<";
704 OS << "]";
705 if (Splitable)
706 OS << " splitable";
707 }
708 OS << "!\n";
709
711 if (!Assumptions.isAlwaysTrue()) {
712 OS << " Runtime Assumptions:\n";
713 Assumptions.print(OS, 2);
714 }
715}
716
717// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
718// underlaying objects. If LocA and LocB are known to not alias (for any reason:
719// tbaa, non-overlapping regions etc), then it is known there is no dependecy.
720// Otherwise the underlying objects are checked to see if they point to
721// different identifiable objects.
723 const MemoryLocation &LocA,
724 const MemoryLocation &LocB) {
725 // Check the original locations (minus size) for noalias, which can happen for
726 // tbaa, incompatible underlying object locations, etc.
727 MemoryLocation LocAS =
729 MemoryLocation LocBS =
731 BatchAAResults BAA(*AA);
733
734 if (BAA.isNoAlias(LocAS, LocBS))
736
737 // Check the underlying objects are the same
738 const Value *AObj = getUnderlyingObject(LocA.Ptr);
739 const Value *BObj = getUnderlyingObject(LocB.Ptr);
740
741 // If the underlying objects are the same, they must alias
742 if (AObj == BObj)
744
745 // We may have hit the recursion limit for underlying objects, or have
746 // underlying objects where we don't know they will alias.
747 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))
749
750 // Otherwise we know the objects are different and both identified objects so
751 // must not alias.
753}
754
755// Returns true if the load or store can be analyzed. Atomic and volatile
756// operations have properties which this analysis does not understand.
757static bool isLoadOrStore(const Instruction *I) {
758 if (const LoadInst *LI = dyn_cast<LoadInst>(I))
759 return LI->isUnordered();
760 else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
761 return SI->isUnordered();
762 return false;
763}
764
765// Examines the loop nesting of the Src and Dst
766// instructions and establishes their shared loops. Sets the variables
767// CommonLevels, SrcLevels, and MaxLevels.
768// The source and destination instructions needn't be contained in the same
769// loop. The routine establishNestingLevels finds the level of most deeply
770// nested loop that contains them both, CommonLevels. An instruction that's
771// not contained in a loop is at level = 0. MaxLevels is equal to the level
772// of the source plus the level of the destination, minus CommonLevels.
773// This lets us allocate vectors MaxLevels in length, with room for every
774// distinct loop referenced in both the source and destination subscripts.
775// The variable SrcLevels is the nesting depth of the source instruction.
776// It's used to help calculate distinct loops referenced by the destination.
777// Here's the map from loops to levels:
778// 0 - unused
779// 1 - outermost common loop
780// ... - other common loops
781// CommonLevels - innermost common loop
782// ... - loops containing Src but not Dst
783// SrcLevels - innermost loop containing Src but not Dst
784// ... - loops containing Dst but not Src
785// MaxLevels - innermost loops containing Dst but not Src
786// Consider the follow code fragment:
787// for (a = ...) {
788// for (b = ...) {
789// for (c = ...) {
790// for (d = ...) {
791// A[] = ...;
792// }
793// }
794// for (e = ...) {
795// for (f = ...) {
796// for (g = ...) {
797// ... = A[];
798// }
799// }
800// }
801// }
802// }
803// If we're looking at the possibility of a dependence between the store
804// to A (the Src) and the load from A (the Dst), we'll note that they
805// have 2 loops in common, so CommonLevels will equal 2 and the direction
806// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
807// A map from loop names to loop numbers would look like
808// a - 1
809// b - 2 = CommonLevels
810// c - 3
811// d - 4 = SrcLevels
812// e - 5
813// f - 6
814// g - 7 = MaxLevels
815void DependenceInfo::establishNestingLevels(const Instruction *Src,
816 const Instruction *Dst) {
817 const BasicBlock *SrcBlock = Src->getParent();
818 const BasicBlock *DstBlock = Dst->getParent();
819 unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
820 unsigned DstLevel = LI->getLoopDepth(DstBlock);
821 const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
822 const Loop *DstLoop = LI->getLoopFor(DstBlock);
823 SrcLevels = SrcLevel;
824 MaxLevels = SrcLevel + DstLevel;
825 while (SrcLevel > DstLevel) {
826 SrcLoop = SrcLoop->getParentLoop();
827 SrcLevel--;
828 }
829 while (DstLevel > SrcLevel) {
830 DstLoop = DstLoop->getParentLoop();
831 DstLevel--;
832 }
833 while (SrcLoop != DstLoop) {
834 SrcLoop = SrcLoop->getParentLoop();
835 DstLoop = DstLoop->getParentLoop();
836 SrcLevel--;
837 }
838 CommonLevels = SrcLevel;
839 MaxLevels -= CommonLevels;
840}
841
842// Given one of the loops containing the source, return
843// its level index in our numbering scheme.
844unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
845 return SrcLoop->getLoopDepth();
846}
847
848// Given one of the loops containing the destination,
849// return its level index in our numbering scheme.
850unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
851 unsigned D = DstLoop->getLoopDepth();
852 if (D > CommonLevels)
853 // This tries to make sure that we assign unique numbers to src and dst when
854 // the memory accesses reside in different loops that have the same depth.
855 return D - CommonLevels + SrcLevels;
856 else
857 return D;
858}
859
860// Returns true if Expression is loop invariant in LoopNest.
861bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
862 const Loop *LoopNest) const {
863 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
864 // any loop as invariant, because we only consier expression evaluation at a
865 // specific position (where the array access takes place), and not across the
866 // entire function.
867 if (!LoopNest)
868 return true;
869
870 // If the expression is invariant in the outermost loop of the loop nest, it
871 // is invariant anywhere in the loop nest.
872 return SE->isLoopInvariant(Expression, LoopNest->getOutermostLoop());
873}
874
875// Finds the set of loops from the LoopNest that
876// have a level <= CommonLevels and are referred to by the SCEV Expression.
877void DependenceInfo::collectCommonLoops(const SCEV *Expression,
878 const Loop *LoopNest,
879 SmallBitVector &Loops) const {
880 while (LoopNest) {
881 unsigned Level = LoopNest->getLoopDepth();
882 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
883 Loops.set(Level);
884 LoopNest = LoopNest->getParentLoop();
885 }
886}
887
888void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {
889
890 unsigned widestWidthSeen = 0;
891 Type *widestType;
892
893 // Go through each pair and find the widest bit to which we need
894 // to extend all of them.
895 for (Subscript *Pair : Pairs) {
896 const SCEV *Src = Pair->Src;
897 const SCEV *Dst = Pair->Dst;
898 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
899 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
900 if (SrcTy == nullptr || DstTy == nullptr) {
901 assert(SrcTy == DstTy &&
902 "This function only unify integer types and "
903 "expect Src and Dst share the same type otherwise.");
904 continue;
905 }
906 if (SrcTy->getBitWidth() > widestWidthSeen) {
907 widestWidthSeen = SrcTy->getBitWidth();
908 widestType = SrcTy;
909 }
910 if (DstTy->getBitWidth() > widestWidthSeen) {
911 widestWidthSeen = DstTy->getBitWidth();
912 widestType = DstTy;
913 }
914 }
915
916 assert(widestWidthSeen > 0);
917
918 // Now extend each pair to the widest seen.
919 for (Subscript *Pair : Pairs) {
920 const SCEV *Src = Pair->Src;
921 const SCEV *Dst = Pair->Dst;
922 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
923 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
924 if (SrcTy == nullptr || DstTy == nullptr) {
925 assert(SrcTy == DstTy &&
926 "This function only unify integer types and "
927 "expect Src and Dst share the same type otherwise.");
928 continue;
929 }
930 if (SrcTy->getBitWidth() < widestWidthSeen)
931 // Sign-extend Src to widestType
932 Pair->Src = SE->getSignExtendExpr(Src, widestType);
933 if (DstTy->getBitWidth() < widestWidthSeen) {
934 // Sign-extend Dst to widestType
935 Pair->Dst = SE->getSignExtendExpr(Dst, widestType);
936 }
937 }
938}
939
940// removeMatchingExtensions - Examines a subscript pair.
941// If the source and destination are identically sign (or zero)
942// extended, it strips off the extension in an effect to simplify
943// the actual analysis.
944void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
945 const SCEV *Src = Pair->Src;
946 const SCEV *Dst = Pair->Dst;
949 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src);
950 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst);
951 const SCEV *SrcCastOp = SrcCast->getOperand();
952 const SCEV *DstCastOp = DstCast->getOperand();
953 if (SrcCastOp->getType() == DstCastOp->getType()) {
954 Pair->Src = SrcCastOp;
955 Pair->Dst = DstCastOp;
956 }
957 }
958}
959
960// Examine the scev and return true iff it's affine.
961// Collect any loops mentioned in the set of "Loops".
962bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
963 SmallBitVector &Loops, bool IsSrc) {
964 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
965 if (!AddRec)
966 return isLoopInvariant(Expr, LoopNest);
967
968 // The AddRec must depend on one of the containing loops. Otherwise,
969 // mapSrcLoop and mapDstLoop return indices outside the intended range. This
970 // can happen when a subscript in one loop references an IV from a sibling
971 // loop that could not be replaced with a concrete exit value by
972 // getSCEVAtScope.
973 const Loop *L = LoopNest;
974 while (L && AddRec->getLoop() != L)
975 L = L->getParentLoop();
976 if (!L)
977 return false;
978
979 const SCEV *Start = AddRec->getStart();
980 const SCEV *Step = AddRec->getStepRecurrence(*SE);
981 if (!isLoopInvariant(Step, LoopNest))
982 return false;
983 if (IsSrc)
984 Loops.set(mapSrcLoop(AddRec->getLoop()));
985 else
986 Loops.set(mapDstLoop(AddRec->getLoop()));
987 return checkSubscript(Start, LoopNest, Loops, IsSrc);
988}
989
990// Examine the scev and return true iff it's linear.
991// Collect any loops mentioned in the set of "Loops".
992bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
993 SmallBitVector &Loops) {
994 return checkSubscript(Src, LoopNest, Loops, true);
995}
996
997// Examine the scev and return true iff it's linear.
998// Collect any loops mentioned in the set of "Loops".
999bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
1000 SmallBitVector &Loops) {
1001 return checkSubscript(Dst, LoopNest, Loops, false);
1002}
1003
1004// Examines the subscript pair (the Src and Dst SCEVs)
1005// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
1006// Collects the associated loops in a set.
1007DependenceInfo::Subscript::ClassificationKind
1008DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
1009 const SCEV *Dst, const Loop *DstLoopNest,
1010 SmallBitVector &Loops) {
1011 SmallBitVector SrcLoops(MaxLevels + 1);
1012 SmallBitVector DstLoops(MaxLevels + 1);
1013 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
1014 return Subscript::NonLinear;
1015 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
1016 return Subscript::NonLinear;
1017 Loops = SrcLoops;
1018 Loops |= DstLoops;
1019 unsigned N = Loops.count();
1020 if (N == 0)
1021 return Subscript::ZIV;
1022 if (N == 1)
1023 return Subscript::SIV;
1024 if (N == 2 && (SrcLoops.count() == 0 || DstLoops.count() == 0 ||
1025 (SrcLoops.count() == 1 && DstLoops.count() == 1)))
1026 return Subscript::RDIV;
1027 return Subscript::MIV;
1028}
1029
1030// A wrapper around SCEV::isKnownPredicate.
1031// Looks for cases where we're interested in comparing for equality.
1032// If both X and Y have been identically sign or zero extended,
1033// it strips off the (confusing) extensions before invoking
1034// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
1035// will be similarly updated.
1036//
1037// If SCEV::isKnownPredicate can't prove the predicate,
1038// we try simple subtraction, which seems to help in some cases
1039// involving symbolics.
1040bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
1041 const SCEV *Y) const {
1042 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1045 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X);
1046 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y);
1047 const SCEV *Xop = CX->getOperand();
1048 const SCEV *Yop = CY->getOperand();
1049 if (Xop->getType() == Yop->getType()) {
1050 X = Xop;
1051 Y = Yop;
1052 }
1053 }
1054 }
1055 if (SE->isKnownPredicate(Pred, X, Y))
1056 return true;
1057 // If SE->isKnownPredicate can't prove the condition,
1058 // we try the brute-force approach of subtracting
1059 // and testing the difference.
1060 // By testing with SE->isKnownPredicate first, we avoid
1061 // the possibility of overflow when the arguments are constants.
1062 const SCEV *Delta = SE->getMinusSCEV(X, Y);
1063 switch (Pred) {
1064 case CmpInst::ICMP_EQ:
1065 return Delta->isZero();
1066 case CmpInst::ICMP_NE:
1067 return SE->isKnownNonZero(Delta);
1068 case CmpInst::ICMP_SGE:
1069 return SE->isKnownNonNegative(Delta);
1070 case CmpInst::ICMP_SLE:
1071 return SE->isKnownNonPositive(Delta);
1072 case CmpInst::ICMP_SGT:
1073 return SE->isKnownPositive(Delta);
1074 case CmpInst::ICMP_SLT:
1075 return SE->isKnownNegative(Delta);
1076 default:
1077 llvm_unreachable("unexpected predicate in isKnownPredicate");
1078 }
1079}
1080
1081/// Compare to see if S is less than Size, using
1082///
1083/// isKnownNegative(S - Size)
1084///
1085/// with some extra checking if S is an AddRec and we can prove less-than using
1086/// the loop bounds.
1087bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
1088 // First unify to the same type
1089 auto *SType = dyn_cast<IntegerType>(S->getType());
1090 auto *SizeType = dyn_cast<IntegerType>(Size->getType());
1091 if (!SType || !SizeType)
1092 return false;
1093 Type *MaxType =
1094 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
1095 S = SE->getTruncateOrZeroExtend(S, MaxType);
1096 Size = SE->getTruncateOrZeroExtend(Size, MaxType);
1097
1098 // Special check for addrecs using BE taken count
1099 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S))
1100 if (AddRec->isAffine() && AddRec->hasNoSignedWrap()) {
1101 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
1102 const SCEV *Start = AddRec->getStart();
1103 const SCEV *Step = AddRec->getStepRecurrence(*SE);
1104 const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE);
1105 const SCEV *Diff0 = SE->getMinusSCEV(Start, Size);
1106 const SCEV *Diff1 = SE->getMinusSCEV(End, Size);
1107
1108 // If the value of Step is non-negative and the AddRec is non-wrap, it
1109 // reaches its maximum at the last iteration. So it's enouth to check
1110 // whether End - Size is negative.
1111 if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1))
1112 return true;
1113
1114 // If the value of Step is non-positive and the AddRec is non-wrap, the
1115 // initial value is its maximum.
1116 if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0))
1117 return true;
1118
1119 // Even if we don't know the sign of Step, either Start or End must be
1120 // the maximum value of the AddRec since it is non-wrap.
1121 if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1))
1122 return true;
1123 }
1124
1125 // Check using normal isKnownNegative
1126 const SCEV *LimitedBound = SE->getMinusSCEV(S, Size);
1127 return SE->isKnownNegative(LimitedBound);
1128}
1129
1130bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {
1131 bool Inbounds = false;
1132 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
1133 Inbounds = SrcGEP->isInBounds();
1134 if (Inbounds) {
1135 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
1136 if (AddRec->isAffine()) {
1137 // We know S is for Ptr, the operand on a load/store, so doesn't wrap.
1138 // If both parts are NonNegative, the end result will be NonNegative
1139 if (SE->isKnownNonNegative(AddRec->getStart()) &&
1140 SE->isKnownNonNegative(AddRec->getOperand(1)))
1141 return true;
1142 }
1143 }
1144 }
1145
1146 return SE->isKnownNonNegative(S);
1147}
1148
1149// All subscripts are all the same type.
1150// Loop bound may be smaller (e.g., a char).
1151// Should zero extend loop bound, since it's always >= 0.
1152// This routine collects upper bound and extends or truncates if needed.
1153// Truncating is safe when subscripts are known not to wrap. Cases without
1154// nowrap flags should have been rejected earlier.
1155// Return null if no bound available.
1156const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {
1157 if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
1158 const SCEV *UB = SE->getBackedgeTakenCount(L);
1159 return SE->getTruncateOrZeroExtend(UB, T);
1160 }
1161 return nullptr;
1162}
1163
1164// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
1165// If the cast fails, returns NULL.
1166const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,
1167 Type *T) const {
1168 if (const SCEV *UB = collectUpperBound(L, T))
1169 return dyn_cast<SCEVConstant>(UB);
1170 return nullptr;
1171}
1172
1173// testZIV -
1174// When we have a pair of subscripts of the form [c1] and [c2],
1175// where c1 and c2 are both loop invariant, we attack it using
1176// the ZIV test. Basically, we test by comparing the two values,
1177// but there are actually three possible results:
1178// 1) the values are equal, so there's a dependence
1179// 2) the values are different, so there's no dependence
1180// 3) the values might be equal, so we have to assume a dependence.
1181//
1182// Return true if dependence disproved.
1183bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
1184 FullDependence &Result) const {
1185 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
1186 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
1187 ++ZIVapplications;
1188 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
1189 LLVM_DEBUG(dbgs() << " provably dependent\n");
1190 return false; // provably dependent
1191 }
1192 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
1193 LLVM_DEBUG(dbgs() << " provably independent\n");
1194 ++ZIVindependence;
1195 return true; // provably independent
1196 }
1197 LLVM_DEBUG(dbgs() << " possibly dependent\n");
1198 Result.Consistent = false;
1199 return false; // possibly dependent
1200}
1201
1202// strongSIVtest -
1203// From the paper, Practical Dependence Testing, Section 4.2.1
1204//
1205// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
1206// where i is an induction variable, c1 and c2 are loop invariant,
1207// and a is a constant, we can solve it exactly using the Strong SIV test.
1208//
1209// Can prove independence. Failing that, can compute distance (and direction).
1210// In the presence of symbolic terms, we can sometimes make progress.
1211//
1212// If there's a dependence,
1213//
1214// c1 + a*i = c2 + a*i'
1215//
1216// The dependence distance is
1217//
1218// d = i' - i = (c1 - c2)/a
1219//
1220// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1221// loop's upper bound. If a dependence exists, the dependence direction is
1222// defined as
1223//
1224// { < if d > 0
1225// direction = { = if d = 0
1226// { > if d < 0
1227//
1228// Return true if dependence disproved.
1229bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
1230 const SCEV *DstConst, const Loop *CurLoop,
1231 unsigned Level, FullDependence &Result,
1232 Constraint &NewConstraint) const {
1233 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
1234 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);
1235 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1236 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);
1237 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1238 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);
1239 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1240 ++StrongSIVapplications;
1241 assert(0 < Level && Level <= CommonLevels && "level out of range");
1242 Level--;
1243
1244 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1245 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
1246 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1247
1248 // check that |Delta| < iteration count
1249 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1250 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);
1251 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1252 const SCEV *AbsDelta =
1253 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
1254 const SCEV *AbsCoeff =
1255 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
1256 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
1257 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
1258 // Distance greater than trip count - no dependence
1259 ++StrongSIVindependence;
1260 ++StrongSIVsuccesses;
1261 return true;
1262 }
1263 }
1264
1265 // Can we compute distance?
1266 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
1267 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
1268 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
1269 APInt Distance = ConstDelta; // these need to be initialized
1270 APInt Remainder = ConstDelta;
1271 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
1272 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1273 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1274 // Make sure Coeff divides Delta exactly
1275 if (Remainder != 0) {
1276 // Coeff doesn't divide Distance, no dependence
1277 ++StrongSIVindependence;
1278 ++StrongSIVsuccesses;
1279 return true;
1280 }
1281 Result.DV[Level].Distance = SE->getConstant(Distance);
1282 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
1283 if (Distance.sgt(0))
1284 Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1285 else if (Distance.slt(0))
1286 Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1287 else
1288 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1289 ++StrongSIVsuccesses;
1290 } else if (Delta->isZero()) {
1291 // since 0/X == 0
1292 Result.DV[Level].Distance = Delta;
1293 NewConstraint.setDistance(Delta, CurLoop);
1294 Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1295 ++StrongSIVsuccesses;
1296 } else {
1297 if (Coeff->isOne()) {
1298 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");
1299 Result.DV[Level].Distance = Delta; // since X/1 == X
1300 NewConstraint.setDistance(Delta, CurLoop);
1301 } else {
1302 Result.Consistent = false;
1303 NewConstraint.setLine(Coeff, SE->getNegativeSCEV(Coeff),
1304 SE->getNegativeSCEV(Delta), CurLoop);
1305 }
1306
1307 // maybe we can get a useful direction
1308 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta);
1309 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
1310 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
1311 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
1312 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
1313 // The double negatives above are confusing.
1314 // It helps to read !SE->isKnownNonZero(Delta)
1315 // as "Delta might be Zero"
1316 unsigned NewDirection = Dependence::DVEntry::NONE;
1317 if ((DeltaMaybePositive && CoeffMaybePositive) ||
1318 (DeltaMaybeNegative && CoeffMaybeNegative))
1319 NewDirection = Dependence::DVEntry::LT;
1320 if (DeltaMaybeZero)
1321 NewDirection |= Dependence::DVEntry::EQ;
1322 if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1323 (DeltaMaybePositive && CoeffMaybeNegative))
1324 NewDirection |= Dependence::DVEntry::GT;
1325 if (NewDirection < Result.DV[Level].Direction)
1326 ++StrongSIVsuccesses;
1327 Result.DV[Level].Direction &= NewDirection;
1328 }
1329 return false;
1330}
1331
1332// weakCrossingSIVtest -
1333// From the paper, Practical Dependence Testing, Section 4.2.2
1334//
1335// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1336// where i is an induction variable, c1 and c2 are loop invariant,
1337// and a is a constant, we can solve it exactly using the
1338// Weak-Crossing SIV test.
1339//
1340// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1341// the two lines, where i = i', yielding
1342//
1343// c1 + a*i = c2 - a*i
1344// 2a*i = c2 - c1
1345// i = (c2 - c1)/2a
1346//
1347// If i < 0, there is no dependence.
1348// If i > upperbound, there is no dependence.
1349// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1350// If i = upperbound, there's a dependence with distance = 0.
1351// If i is integral, there's a dependence (all directions).
1352// If the non-integer part = 1/2, there's a dependence (<> directions).
1353// Otherwise, there's no dependence.
1354//
1355// Can prove independence. Failing that,
1356// can sometimes refine the directions.
1357// Can determine iteration for splitting.
1358//
1359// Return true if dependence disproved.
1360bool DependenceInfo::weakCrossingSIVtest(
1361 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
1362 const Loop *CurLoop, unsigned Level, FullDependence &Result,
1363 Constraint &NewConstraint, const SCEV *&SplitIter) const {
1364 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1365 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");
1366 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1367 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1368 ++WeakCrossingSIVapplications;
1369 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1370 Level--;
1371 Result.Consistent = false;
1372 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1373 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1374 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
1375 if (Delta->isZero()) {
1376 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1377 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1378 ++WeakCrossingSIVsuccesses;
1379 if (!Result.DV[Level].Direction) {
1380 ++WeakCrossingSIVindependence;
1381 return true;
1382 }
1383 Result.DV[Level].Distance = Delta; // = 0
1384 return false;
1385 }
1386 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
1387 if (!ConstCoeff)
1388 return false;
1389
1390 Result.DV[Level].Splitable = true;
1391 if (SE->isKnownNegative(ConstCoeff)) {
1392 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
1393 assert(ConstCoeff &&
1394 "dynamic cast of negative of ConstCoeff should yield constant");
1395 Delta = SE->getNegativeSCEV(Delta);
1396 }
1397 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1398
1399 // compute SplitIter for use by DependenceInfo::getSplitIteration()
1400 SplitIter = SE->getUDivExpr(
1401 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
1402 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
1403 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");
1404
1405 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1406 if (!ConstDelta)
1407 return false;
1408
1409 // We're certain that ConstCoeff > 0; therefore,
1410 // if Delta < 0, then no dependence.
1411 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1412 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");
1413 if (SE->isKnownNegative(Delta)) {
1414 // No dependence, Delta < 0
1415 ++WeakCrossingSIVindependence;
1416 ++WeakCrossingSIVsuccesses;
1417 return true;
1418 }
1419
1420 // We're certain that Delta > 0 and ConstCoeff > 0.
1421 // Check Delta/(2*ConstCoeff) against upper loop bound
1422 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1423 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1424 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
1425 const SCEV *ML =
1426 SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), ConstantTwo);
1427 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");
1428 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
1429 // Delta too big, no dependence
1430 ++WeakCrossingSIVindependence;
1431 ++WeakCrossingSIVsuccesses;
1432 return true;
1433 }
1434 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
1435 // i = i' = UB
1436 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
1437 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
1438 ++WeakCrossingSIVsuccesses;
1439 if (!Result.DV[Level].Direction) {
1440 ++WeakCrossingSIVindependence;
1441 return true;
1442 }
1443 Result.DV[Level].Splitable = false;
1444 Result.DV[Level].Distance = SE->getZero(Delta->getType());
1445 return false;
1446 }
1447 }
1448
1449 // check that Coeff divides Delta
1450 APInt APDelta = ConstDelta->getAPInt();
1451 APInt APCoeff = ConstCoeff->getAPInt();
1452 APInt Distance = APDelta; // these need to be initialzed
1453 APInt Remainder = APDelta;
1454 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
1455 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1456 if (Remainder != 0) {
1457 // Coeff doesn't divide Delta, no dependence
1458 ++WeakCrossingSIVindependence;
1459 ++WeakCrossingSIVsuccesses;
1460 return true;
1461 }
1462 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");
1463
1464 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1465 APInt Two = APInt(Distance.getBitWidth(), 2, true);
1466 Remainder = Distance.srem(Two);
1467 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");
1468 if (Remainder != 0) {
1469 // Equal direction isn't possible
1470 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
1471 ++WeakCrossingSIVsuccesses;
1472 }
1473 return false;
1474}
1475
1476// Kirch's algorithm, from
1477//
1478// Optimizing Supercompilers for Supercomputers
1479// Michael Wolfe
1480// MIT Press, 1989
1481//
1482// Program 2.1, page 29.
1483// Computes the GCD of AM and BM.
1484// Also finds a solution to the equation ax - by = gcd(a, b).
1485// Returns true if dependence disproved; i.e., gcd does not divide Delta.
1486static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,
1487 const APInt &Delta, APInt &G, APInt &X, APInt &Y) {
1488 APInt A0(Bits, 1, true), A1(Bits, 0, true);
1489 APInt B0(Bits, 0, true), B1(Bits, 1, true);
1490 APInt G0 = AM.abs();
1491 APInt G1 = BM.abs();
1492 APInt Q = G0; // these need to be initialized
1493 APInt R = G0;
1494 APInt::sdivrem(G0, G1, Q, R);
1495 while (R != 0) {
1496 // clang-format off
1497 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1498 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1499 G0 = G1; G1 = R;
1500 // clang-format on
1501 APInt::sdivrem(G0, G1, Q, R);
1502 }
1503 G = G1;
1504 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");
1505 X = AM.slt(0) ? -A1 : A1;
1506 Y = BM.slt(0) ? B1 : -B1;
1507
1508 // make sure gcd divides Delta
1509 R = Delta.srem(G);
1510 if (R != 0)
1511 return true; // gcd doesn't divide Delta, no dependence
1512 Q = Delta.sdiv(G);
1513 return false;
1514}
1515
1516static APInt floorOfQuotient(const APInt &A, const APInt &B) {
1517 APInt Q = A; // these need to be initialized
1518 APInt R = A;
1519 APInt::sdivrem(A, B, Q, R);
1520 if (R == 0)
1521 return Q;
1522 if ((A.sgt(0) && B.sgt(0)) || (A.slt(0) && B.slt(0)))
1523 return Q;
1524 else
1525 return Q - 1;
1526}
1527
1528static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
1529 APInt Q = A; // these need to be initialized
1530 APInt R = A;
1531 APInt::sdivrem(A, B, Q, R);
1532 if (R == 0)
1533 return Q;
1534 if ((A.sgt(0) && B.sgt(0)) || (A.slt(0) && B.slt(0)))
1535 return Q + 1;
1536 else
1537 return Q;
1538}
1539
1540/// Given an affine expression of the form A*k + B, where k is an arbitrary
1541/// integer, infer the possible range of k based on the known range of the
1542/// affine expression. If we know A*k + B is non-negative, i.e.,
1543///
1544/// A*k + B >= 0
1545///
1546/// we can derive the following inequalities for k when A is positive:
1547///
1548/// k >= -B / A
1549///
1550/// Since k is an integer, it means k is greater than or equal to the
1551/// ceil(-B / A).
1552///
1553/// If the upper bound of the affine expression \p UB is passed, the following
1554/// inequality can be derived as well:
1555///
1556/// A*k + B <= UB
1557///
1558/// which leads to:
1559///
1560/// k <= (UB - B) / A
1561///
1562/// Again, as k is an integer, it means k is less than or equal to the
1563/// floor((UB - B) / A).
1564///
1565/// The similar logic applies when A is negative, but the inequalities sign flip
1566/// while working with them.
1567///
1568/// Preconditions: \p A is non-zero, and we know A*k + B is non-negative.
1569static std::pair<std::optional<APInt>, std::optional<APInt>>
1571 const std::optional<APInt> &UB) {
1572 assert(A != 0 && "A must be non-zero");
1573 std::optional<APInt> TL, TU;
1574 if (A.sgt(0)) {
1575 TL = ceilingOfQuotient(-B, A);
1576 LLVM_DEBUG(dbgs() << "\t Possible TL = " << *TL << "\n");
1577 // New bound check - modification to Banerjee's e3 check
1578 if (UB) {
1579 // TODO?: Overflow check for UB - B
1580 TU = floorOfQuotient(*UB - B, A);
1581 LLVM_DEBUG(dbgs() << "\t Possible TU = " << *TU << "\n");
1582 }
1583 } else {
1584 TU = floorOfQuotient(-B, A);
1585 LLVM_DEBUG(dbgs() << "\t Possible TU = " << *TU << "\n");
1586 // New bound check - modification to Banerjee's e3 check
1587 if (UB) {
1588 // TODO?: Overflow check for UB - B
1589 TL = ceilingOfQuotient(*UB - B, A);
1590 LLVM_DEBUG(dbgs() << "\t Possible TL = " << *TL << "\n");
1591 }
1592 }
1593 return std::make_pair(TL, TU);
1594}
1595
1596// exactSIVtest -
1597// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1598// where i is an induction variable, c1 and c2 are loop invariant, and a1
1599// and a2 are constant, we can solve it exactly using an algorithm developed
1600// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:
1601//
1602// Dependence Analysis for Supercomputing
1603// Utpal Banerjee
1604// Kluwer Academic Publishers, 1988
1605//
1606// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1607// so use them if possible. They're also a bit better with symbolics and,
1608// in the case of the strong SIV test, can compute Distances.
1609//
1610// Return true if dependence disproved.
1611//
1612// This is a modified version of the original Banerjee algorithm. The original
1613// only tested whether Dst depends on Src. This algorithm extends that and
1614// returns all the dependencies that exist between Dst and Src.
1615bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1616 const SCEV *SrcConst, const SCEV *DstConst,
1617 const Loop *CurLoop, unsigned Level,
1618 FullDependence &Result,
1619 Constraint &NewConstraint) const {
1620 LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
1621 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1622 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1623 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1624 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1625 ++ExactSIVapplications;
1626 assert(0 < Level && Level <= CommonLevels && "Level out of range");
1627 Level--;
1628 Result.Consistent = false;
1629 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1630 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1631 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
1632 CurLoop);
1633 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1634 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1635 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1636 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1637 return false;
1638
1639 // find gcd
1640 APInt G, X, Y;
1641 APInt AM = ConstSrcCoeff->getAPInt();
1642 APInt BM = ConstDstCoeff->getAPInt();
1643 APInt CM = ConstDelta->getAPInt();
1644 unsigned Bits = AM.getBitWidth();
1645 if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
1646 // gcd doesn't divide Delta, no dependence
1647 ++ExactSIVindependence;
1648 ++ExactSIVsuccesses;
1649 return true;
1650 }
1651
1652 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
1653
1654 // since SCEV construction normalizes, LM = 0
1655 std::optional<APInt> UM;
1656 // UM is perhaps unavailable, let's check
1657 if (const SCEVConstant *CUB =
1658 collectConstantUpperBound(CurLoop, Delta->getType())) {
1659 UM = CUB->getAPInt();
1660 LLVM_DEBUG(dbgs() << "\t UM = " << *UM << "\n");
1661 }
1662
1663 APInt TU(APInt::getSignedMaxValue(Bits));
1664 APInt TL(APInt::getSignedMinValue(Bits));
1665 APInt TC = CM.sdiv(G);
1666 APInt TX = X * TC;
1667 APInt TY = Y * TC;
1668 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
1669 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
1670 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
1671
1672 APInt TB = BM.sdiv(G);
1673 APInt TA = AM.sdiv(G);
1674
1675 // At this point, we have the following equations:
1676 //
1677 // TA*i0 - TB*i1 = TC
1678 //
1679 // Also, we know that the all pairs of (i0, i1) can be expressed as:
1680 //
1681 // (TX + k*TB, TY + k*TA)
1682 //
1683 // where k is an arbitrary integer.
1684 auto [TL0, TU0] = inferDomainOfAffine(TB, TX, UM);
1685 auto [TL1, TU1] = inferDomainOfAffine(TA, TY, UM);
1686
1687 auto CreateVec = [](const std::optional<APInt> &V0,
1688 const std::optional<APInt> &V1) {
1690 if (V0)
1691 Vec.push_back(*V0);
1692 if (V1)
1693 Vec.push_back(*V1);
1694 return Vec;
1695 };
1696
1697 SmallVector<APInt, 2> TLVec = CreateVec(TL0, TL1);
1698 SmallVector<APInt, 2> TUVec = CreateVec(TU0, TU1);
1699
1700 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
1701 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
1702
1703 if (TLVec.empty() || TUVec.empty())
1704 return false;
1705 TL = APIntOps::smax(TLVec.front(), TLVec.back());
1706 TU = APIntOps::smin(TUVec.front(), TUVec.back());
1707 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
1708 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
1709
1710 if (TL.sgt(TU)) {
1711 ++ExactSIVindependence;
1712 ++ExactSIVsuccesses;
1713 return true;
1714 }
1715
1716 // explore directions
1717 unsigned NewDirection = Dependence::DVEntry::NONE;
1718 APInt LowerDistance, UpperDistance;
1719 if (TA.sgt(TB)) {
1720 LowerDistance = (TY - TX) + (TA - TB) * TL;
1721 UpperDistance = (TY - TX) + (TA - TB) * TU;
1722 } else {
1723 LowerDistance = (TY - TX) + (TA - TB) * TU;
1724 UpperDistance = (TY - TX) + (TA - TB) * TL;
1725 }
1726
1727 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n");
1728 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n");
1729
1730 APInt Zero(Bits, 0, true);
1731 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) {
1732 NewDirection |= Dependence::DVEntry::EQ;
1733 ++ExactSIVsuccesses;
1734 }
1735 if (LowerDistance.slt(0)) {
1736 NewDirection |= Dependence::DVEntry::GT;
1737 ++ExactSIVsuccesses;
1738 }
1739 if (UpperDistance.sgt(0)) {
1740 NewDirection |= Dependence::DVEntry::LT;
1741 ++ExactSIVsuccesses;
1742 }
1743
1744 // finished
1745 Result.DV[Level].Direction &= NewDirection;
1746 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1747 ++ExactSIVindependence;
1748 LLVM_DEBUG(dbgs() << "\t Result = ");
1749 LLVM_DEBUG(Result.dump(dbgs()));
1750 return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1751}
1752
1753// Return true if the divisor evenly divides the dividend.
1754static bool isRemainderZero(const SCEVConstant *Dividend,
1755 const SCEVConstant *Divisor) {
1756 const APInt &ConstDividend = Dividend->getAPInt();
1757 const APInt &ConstDivisor = Divisor->getAPInt();
1758 return ConstDividend.srem(ConstDivisor) == 0;
1759}
1760
1761// weakZeroSrcSIVtest -
1762// From the paper, Practical Dependence Testing, Section 4.2.2
1763//
1764// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1765// where i is an induction variable, c1 and c2 are loop invariant,
1766// and a is a constant, we can solve it exactly using the
1767// Weak-Zero SIV test.
1768//
1769// Given
1770//
1771// c1 = c2 + a*i
1772//
1773// we get
1774//
1775// (c1 - c2)/a = i
1776//
1777// If i is not an integer, there's no dependence.
1778// If i < 0 or > UB, there's no dependence.
1779// If i = 0, the direction is >= and peeling the
1780// 1st iteration will break the dependence.
1781// If i = UB, the direction is <= and peeling the
1782// last iteration will break the dependence.
1783// Otherwise, the direction is *.
1784//
1785// Can prove independence. Failing that, we can sometimes refine
1786// the directions. Can sometimes show that first or last
1787// iteration carries all the dependences (so worth peeling).
1788//
1789// (see also weakZeroDstSIVtest)
1790//
1791// Return true if dependence disproved.
1792bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1793 const SCEV *SrcConst,
1794 const SCEV *DstConst,
1795 const Loop *CurLoop, unsigned Level,
1796 FullDependence &Result,
1797 Constraint &NewConstraint) const {
1798 // For the WeakSIV test, it's possible the loop isn't common to
1799 // the Src and Dst loops. If it isn't, then there's no need to
1800 // record a direction.
1801 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1802 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");
1803 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1804 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1805 ++WeakZeroSIVapplications;
1806 assert(0 < Level && Level <= MaxLevels && "Level out of range");
1807 Level--;
1808 Result.Consistent = false;
1809 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1810 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
1811 CurLoop);
1812 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1813 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
1814 if (Level < CommonLevels) {
1815 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1816 Result.DV[Level].PeelFirst = true;
1817 ++WeakZeroSIVsuccesses;
1818 }
1819 return false; // dependences caused by first iteration
1820 }
1821 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1822 if (!ConstCoeff)
1823 return false;
1824 const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff)
1825 ? SE->getNegativeSCEV(ConstCoeff)
1826 : ConstCoeff;
1827 const SCEV *NewDelta =
1828 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1829
1830 // check that Delta/SrcCoeff < iteration count
1831 // really check NewDelta < count*AbsCoeff
1832 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1833 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1834 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1835 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1836 ++WeakZeroSIVindependence;
1837 ++WeakZeroSIVsuccesses;
1838 return true;
1839 }
1840 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1841 // dependences caused by last iteration
1842 if (Level < CommonLevels) {
1843 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1844 Result.DV[Level].PeelLast = true;
1845 ++WeakZeroSIVsuccesses;
1846 }
1847 return false;
1848 }
1849 }
1850
1851 // check that Delta/SrcCoeff >= 0
1852 // really check that NewDelta >= 0
1853 if (SE->isKnownNegative(NewDelta)) {
1854 // No dependence, newDelta < 0
1855 ++WeakZeroSIVindependence;
1856 ++WeakZeroSIVsuccesses;
1857 return true;
1858 }
1859
1860 // if SrcCoeff doesn't divide Delta, then no dependence
1861 if (isa<SCEVConstant>(Delta) &&
1862 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1863 ++WeakZeroSIVindependence;
1864 ++WeakZeroSIVsuccesses;
1865 return true;
1866 }
1867 return false;
1868}
1869
1870// weakZeroDstSIVtest -
1871// From the paper, Practical Dependence Testing, Section 4.2.2
1872//
1873// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1874// where i is an induction variable, c1 and c2 are loop invariant,
1875// and a is a constant, we can solve it exactly using the
1876// Weak-Zero SIV test.
1877//
1878// Given
1879//
1880// c1 + a*i = c2
1881//
1882// we get
1883//
1884// i = (c2 - c1)/a
1885//
1886// If i is not an integer, there's no dependence.
1887// If i < 0 or > UB, there's no dependence.
1888// If i = 0, the direction is <= and peeling the
1889// 1st iteration will break the dependence.
1890// If i = UB, the direction is >= and peeling the
1891// last iteration will break the dependence.
1892// Otherwise, the direction is *.
1893//
1894// Can prove independence. Failing that, we can sometimes refine
1895// the directions. Can sometimes show that first or last
1896// iteration carries all the dependences (so worth peeling).
1897//
1898// (see also weakZeroSrcSIVtest)
1899//
1900// Return true if dependence disproved.
1901bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1902 const SCEV *SrcConst,
1903 const SCEV *DstConst,
1904 const Loop *CurLoop, unsigned Level,
1905 FullDependence &Result,
1906 Constraint &NewConstraint) const {
1907 // For the WeakSIV test, it's possible the loop isn't common to the
1908 // Src and Dst loops. If it isn't, then there's no need to record a direction.
1909 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1910 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");
1911 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1912 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1913 ++WeakZeroSIVapplications;
1914 assert(0 < Level && Level <= SrcLevels && "Level out of range");
1915 Level--;
1916 Result.Consistent = false;
1917 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1918 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
1919 CurLoop);
1920 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
1921 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
1922 if (Level < CommonLevels) {
1923 Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1924 Result.DV[Level].PeelFirst = true;
1925 ++WeakZeroSIVsuccesses;
1926 }
1927 return false; // dependences caused by first iteration
1928 }
1929 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1930 if (!ConstCoeff)
1931 return false;
1932 const SCEV *AbsCoeff = SE->isKnownNegative(ConstCoeff)
1933 ? SE->getNegativeSCEV(ConstCoeff)
1934 : ConstCoeff;
1935 const SCEV *NewDelta =
1936 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1937
1938 // check that Delta/SrcCoeff < iteration count
1939 // really check NewDelta < count*AbsCoeff
1940 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1941 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");
1942 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1943 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1944 ++WeakZeroSIVindependence;
1945 ++WeakZeroSIVsuccesses;
1946 return true;
1947 }
1948 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1949 // dependences caused by last iteration
1950 if (Level < CommonLevels) {
1951 Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1952 Result.DV[Level].PeelLast = true;
1953 ++WeakZeroSIVsuccesses;
1954 }
1955 return false;
1956 }
1957 }
1958
1959 // check that Delta/SrcCoeff >= 0
1960 // really check that NewDelta >= 0
1961 if (SE->isKnownNegative(NewDelta)) {
1962 // No dependence, newDelta < 0
1963 ++WeakZeroSIVindependence;
1964 ++WeakZeroSIVsuccesses;
1965 return true;
1966 }
1967
1968 // if SrcCoeff doesn't divide Delta, then no dependence
1969 if (isa<SCEVConstant>(Delta) &&
1970 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1971 ++WeakZeroSIVindependence;
1972 ++WeakZeroSIVsuccesses;
1973 return true;
1974 }
1975 return false;
1976}
1977
1978// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1979// Things of the form [c1 + a*i] and [c2 + b*j],
1980// where i and j are induction variable, c1 and c2 are loop invariant,
1981// and a and b are constants.
1982// Returns true if any possible dependence is disproved.
1983// Marks the result as inconsistent.
1984// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1985bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1986 const SCEV *SrcConst, const SCEV *DstConst,
1987 const Loop *SrcLoop, const Loop *DstLoop,
1988 FullDependence &Result) const {
1990 return false;
1991 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");
1992 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");
1993 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");
1994 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");
1995 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");
1996 ++ExactRDIVapplications;
1997 Result.Consistent = false;
1998 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1999 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
2000 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
2001 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
2002 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
2003 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
2004 return false;
2005
2006 // find gcd
2007 APInt G, X, Y;
2008 APInt AM = ConstSrcCoeff->getAPInt();
2009 APInt BM = ConstDstCoeff->getAPInt();
2010 APInt CM = ConstDelta->getAPInt();
2011 unsigned Bits = AM.getBitWidth();
2012 if (findGCD(Bits, AM, BM, CM, G, X, Y)) {
2013 // gcd doesn't divide Delta, no dependence
2014 ++ExactRDIVindependence;
2015 return true;
2016 }
2017
2018 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");
2019
2020 // since SCEV construction seems to normalize, LM = 0
2021 std::optional<APInt> SrcUM;
2022 // SrcUM is perhaps unavailable, let's check
2023 if (const SCEVConstant *UpperBound =
2024 collectConstantUpperBound(SrcLoop, Delta->getType())) {
2025 SrcUM = UpperBound->getAPInt();
2026 LLVM_DEBUG(dbgs() << "\t SrcUM = " << *SrcUM << "\n");
2027 }
2028
2029 std::optional<APInt> DstUM;
2030 // UM is perhaps unavailable, let's check
2031 if (const SCEVConstant *UpperBound =
2032 collectConstantUpperBound(DstLoop, Delta->getType())) {
2033 DstUM = UpperBound->getAPInt();
2034 LLVM_DEBUG(dbgs() << "\t DstUM = " << *DstUM << "\n");
2035 }
2036
2037 APInt TU(APInt::getSignedMaxValue(Bits));
2038 APInt TL(APInt::getSignedMinValue(Bits));
2039 APInt TC = CM.sdiv(G);
2040 APInt TX = X * TC;
2041 APInt TY = Y * TC;
2042 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");
2043 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");
2044 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");
2045
2046 APInt TB = BM.sdiv(G);
2047 APInt TA = AM.sdiv(G);
2048
2049 // At this point, we have the following equations:
2050 //
2051 // TA*i - TB*j = TC
2052 //
2053 // Also, we know that the all pairs of (i, j) can be expressed as:
2054 //
2055 // (TX + k*TB, TY + k*TA)
2056 //
2057 // where k is an arbitrary integer.
2058 auto [TL0, TU0] = inferDomainOfAffine(TB, TX, SrcUM);
2059 auto [TL1, TU1] = inferDomainOfAffine(TA, TY, DstUM);
2060
2061 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");
2062 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");
2063
2064 auto CreateVec = [](const std::optional<APInt> &V0,
2065 const std::optional<APInt> &V1) {
2067 if (V0)
2068 Vec.push_back(*V0);
2069 if (V1)
2070 Vec.push_back(*V1);
2071 return Vec;
2072 };
2073
2074 SmallVector<APInt, 2> TLVec = CreateVec(TL0, TL1);
2075 SmallVector<APInt, 2> TUVec = CreateVec(TU0, TU1);
2076 if (TLVec.empty() || TUVec.empty())
2077 return false;
2078
2079 TL = APIntOps::smax(TLVec.front(), TLVec.back());
2080 TU = APIntOps::smin(TUVec.front(), TUVec.back());
2081 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");
2082 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");
2083
2084 if (TL.sgt(TU))
2085 ++ExactRDIVindependence;
2086 return TL.sgt(TU);
2087}
2088
2089// symbolicRDIVtest -
2090// In Section 4.5 of the Practical Dependence Testing paper,the authors
2091// introduce a special case of Banerjee's Inequalities (also called the
2092// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
2093// particularly cases with symbolics. Since it's only able to disprove
2094// dependence (not compute distances or directions), we'll use it as a
2095// fall back for the other tests.
2096//
2097// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2098// where i and j are induction variables and c1 and c2 are loop invariants,
2099// we can use the symbolic tests to disprove some dependences, serving as a
2100// backup for the RDIV test. Note that i and j can be the same variable,
2101// letting this test serve as a backup for the various SIV tests.
2102//
2103// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
2104// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
2105// loop bounds for the i and j loops, respectively. So, ...
2106//
2107// c1 + a1*i = c2 + a2*j
2108// a1*i - a2*j = c2 - c1
2109//
2110// To test for a dependence, we compute c2 - c1 and make sure it's in the
2111// range of the maximum and minimum possible values of a1*i - a2*j.
2112// Considering the signs of a1 and a2, we have 4 possible cases:
2113//
2114// 1) If a1 >= 0 and a2 >= 0, then
2115// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
2116// -a2*N2 <= c2 - c1 <= a1*N1
2117//
2118// 2) If a1 >= 0 and a2 <= 0, then
2119// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
2120// 0 <= c2 - c1 <= a1*N1 - a2*N2
2121//
2122// 3) If a1 <= 0 and a2 >= 0, then
2123// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
2124// a1*N1 - a2*N2 <= c2 - c1 <= 0
2125//
2126// 4) If a1 <= 0 and a2 <= 0, then
2127// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2
2128// a1*N1 <= c2 - c1 <= -a2*N2
2129//
2130// return true if dependence disproved
2131bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,
2132 const SCEV *C1, const SCEV *C2,
2133 const Loop *Loop1,
2134 const Loop *Loop2) const {
2136 return false;
2137 ++SymbolicRDIVapplications;
2138 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
2139 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);
2140 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
2141 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");
2142 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");
2143 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");
2144 const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
2145 const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
2146 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");
2147 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");
2148 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
2149 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
2150 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");
2151 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");
2152 if (SE->isKnownNonNegative(A1)) {
2153 if (SE->isKnownNonNegative(A2)) {
2154 // A1 >= 0 && A2 >= 0
2155 if (N1) {
2156 // make sure that c2 - c1 <= a1*N1
2157 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2158 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2159 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
2160 ++SymbolicRDIVindependence;
2161 return true;
2162 }
2163 }
2164 if (N2) {
2165 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
2166 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2167 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2168 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
2169 ++SymbolicRDIVindependence;
2170 return true;
2171 }
2172 }
2173 } else if (SE->isKnownNonPositive(A2)) {
2174 // a1 >= 0 && a2 <= 0
2175 if (N1 && N2) {
2176 // make sure that c2 - c1 <= a1*N1 - a2*N2
2177 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2178 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2179 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2180 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2181 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
2182 ++SymbolicRDIVindependence;
2183 return true;
2184 }
2185 }
2186 // make sure that 0 <= c2 - c1
2187 if (SE->isKnownNegative(C2_C1)) {
2188 ++SymbolicRDIVindependence;
2189 return true;
2190 }
2191 }
2192 } else if (SE->isKnownNonPositive(A1)) {
2193 if (SE->isKnownNonNegative(A2)) {
2194 // a1 <= 0 && a2 >= 0
2195 if (N1 && N2) {
2196 // make sure that a1*N1 - a2*N2 <= c2 - c1
2197 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2198 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2199 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
2200 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
2201 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
2202 ++SymbolicRDIVindependence;
2203 return true;
2204 }
2205 }
2206 // make sure that c2 - c1 <= 0
2207 if (SE->isKnownPositive(C2_C1)) {
2208 ++SymbolicRDIVindependence;
2209 return true;
2210 }
2211 } else if (SE->isKnownNonPositive(A2)) {
2212 // a1 <= 0 && a2 <= 0
2213 if (N1) {
2214 // make sure that a1*N1 <= c2 - c1
2215 const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2216 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");
2217 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
2218 ++SymbolicRDIVindependence;
2219 return true;
2220 }
2221 }
2222 if (N2) {
2223 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2224 const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2225 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");
2226 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
2227 ++SymbolicRDIVindependence;
2228 return true;
2229 }
2230 }
2231 }
2232 }
2233 return false;
2234}
2235
2236// testSIV -
2237// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2238// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2239// a2 are constant, we attack it with an SIV test. While they can all be
2240// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2241// they apply; they're cheaper and sometimes more precise.
2242//
2243// Return true if dependence disproved.
2244bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
2245 FullDependence &Result, Constraint &NewConstraint,
2246 const SCEV *&SplitIter) const {
2247 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2248 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2249 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2250 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2251 if (SrcAddRec && DstAddRec) {
2252 const SCEV *SrcConst = SrcAddRec->getStart();
2253 const SCEV *DstConst = DstAddRec->getStart();
2254 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2255 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2256 const Loop *CurLoop = SrcAddRec->getLoop();
2257 assert(CurLoop == DstAddRec->getLoop() &&
2258 "both loops in SIV should be same");
2259 Level = mapSrcLoop(CurLoop);
2260 bool disproven;
2261 if (SrcCoeff == DstCoeff)
2262 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, Level,
2263 Result, NewConstraint);
2264 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
2265 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2266 Level, Result, NewConstraint, SplitIter);
2267 else
2268 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2269 Level, Result, NewConstraint);
2270 return disproven || gcdMIVtest(Src, Dst, Result) ||
2271 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2272 CurLoop);
2273 }
2274 if (SrcAddRec) {
2275 const SCEV *SrcConst = SrcAddRec->getStart();
2276 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2277 const SCEV *DstConst = Dst;
2278 const Loop *CurLoop = SrcAddRec->getLoop();
2279 Level = mapSrcLoop(CurLoop);
2280 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, Level,
2281 Result, NewConstraint) ||
2282 gcdMIVtest(Src, Dst, Result);
2283 }
2284 if (DstAddRec) {
2285 const SCEV *DstConst = DstAddRec->getStart();
2286 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2287 const SCEV *SrcConst = Src;
2288 const Loop *CurLoop = DstAddRec->getLoop();
2289 Level = mapDstLoop(CurLoop);
2290 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurLoop, Level,
2291 Result, NewConstraint) ||
2292 gcdMIVtest(Src, Dst, Result);
2293 }
2294 llvm_unreachable("SIV test expected at least one AddRec");
2295 return false;
2296}
2297
2298// testRDIV -
2299// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2300// where i and j are induction variables, c1 and c2 are loop invariant,
2301// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2302// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2303// It doesn't make sense to talk about distance or direction in this case,
2304// so there's no point in making special versions of the Strong SIV test or
2305// the Weak-crossing SIV test.
2306//
2307// With minor algebra, this test can also be used for things like
2308// [c1 + a1*i + a2*j][c2].
2309//
2310// Return true if dependence disproved.
2311bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
2312 FullDependence &Result) const {
2313 // we have 3 possible situations here:
2314 // 1) [a*i + b] and [c*j + d]
2315 // 2) [a*i + c*j + b] and [d]
2316 // 3) [b] and [a*i + c*j + d]
2317 // We need to find what we've got and get organized
2318
2319 const SCEV *SrcConst, *DstConst;
2320 const SCEV *SrcCoeff, *DstCoeff;
2321 const Loop *SrcLoop, *DstLoop;
2322
2323 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2324 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2325 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2326 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2327 if (SrcAddRec && DstAddRec) {
2328 SrcConst = SrcAddRec->getStart();
2329 SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2330 SrcLoop = SrcAddRec->getLoop();
2331 DstConst = DstAddRec->getStart();
2332 DstCoeff = DstAddRec->getStepRecurrence(*SE);
2333 DstLoop = DstAddRec->getLoop();
2334 } else if (SrcAddRec) {
2335 if (const SCEVAddRecExpr *tmpAddRec =
2336 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
2337 SrcConst = tmpAddRec->getStart();
2338 SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
2339 SrcLoop = tmpAddRec->getLoop();
2340 DstConst = Dst;
2341 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
2342 DstLoop = SrcAddRec->getLoop();
2343 } else
2344 llvm_unreachable("RDIV reached by surprising SCEVs");
2345 } else if (DstAddRec) {
2346 if (const SCEVAddRecExpr *tmpAddRec =
2347 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
2348 DstConst = tmpAddRec->getStart();
2349 DstCoeff = tmpAddRec->getStepRecurrence(*SE);
2350 DstLoop = tmpAddRec->getLoop();
2351 SrcConst = Src;
2352 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
2353 SrcLoop = DstAddRec->getLoop();
2354 } else
2355 llvm_unreachable("RDIV reached by surprising SCEVs");
2356 } else
2357 llvm_unreachable("RDIV expected at least one AddRec");
2358 return exactRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, SrcLoop, DstLoop,
2359 Result) ||
2360 gcdMIVtest(Src, Dst, Result) ||
2361 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, SrcLoop,
2362 DstLoop);
2363}
2364
2365// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2366// Return true if dependence disproved.
2367// Can sometimes refine direction vectors.
2368bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,
2369 const SmallBitVector &Loops,
2370 FullDependence &Result) const {
2371 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");
2372 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");
2373 Result.Consistent = false;
2374 return gcdMIVtest(Src, Dst, Result) ||
2375 banerjeeMIVtest(Src, Dst, Loops, Result);
2376}
2377
2378// Given a product, e.g., 10*X*Y, returns the first constant operand,
2379// in this case 10. If there is no constant part, returns std::nullopt.
2380static std::optional<APInt> getConstantPart(const SCEV *Expr) {
2381 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))
2382 return Constant->getAPInt();
2383 if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))
2384 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))
2385 return Constant->getAPInt();
2386 return std::nullopt;
2387}
2388
2389bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr,
2390 const Loop *CurLoop,
2391 const SCEV *&CurLoopCoeff,
2392 APInt &RunningGCD) const {
2393 // If RunningGCD is already 1, exit early.
2394 // TODO: It might be better to continue the recursion to find CurLoopCoeff.
2395 if (RunningGCD == 1)
2396 return true;
2397
2398 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2399 if (!AddRec) {
2400 assert(isLoopInvariant(Expr, CurLoop) &&
2401 "Expected loop invariant expression");
2402 return true;
2403 }
2404
2405 assert(AddRec->isAffine() && "Unexpected Expr");
2406 const SCEV *Start = AddRec->getStart();
2407 const SCEV *Step = AddRec->getStepRecurrence(*SE);
2408 if (AddRec->getLoop() == CurLoop) {
2409 CurLoopCoeff = Step;
2410 } else {
2411 std::optional<APInt> ConstCoeff = getConstantPart(Step);
2412
2413 // If the coefficient is the product of a constant and other stuff, we can
2414 // use the constant in the GCD computation.
2415 if (!ConstCoeff)
2416 return false;
2417
2418 // TODO: What happens if ConstCoeff is the "most negative" signed number
2419 // (e.g. -128 for 8 bit wide APInt)?
2420 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
2421 }
2422
2423 return accumulateCoefficientsGCD(Start, CurLoop, CurLoopCoeff, RunningGCD);
2424}
2425
2426//===----------------------------------------------------------------------===//
2427// gcdMIVtest -
2428// Tests an MIV subscript pair for dependence.
2429// Returns true if any possible dependence is disproved.
2430// Marks the result as inconsistent.
2431// Can sometimes disprove the equal direction for 1 or more loops,
2432// as discussed in Michael Wolfe's book,
2433// High Performance Compilers for Parallel Computing, page 235.
2434//
2435// We spend some effort (code!) to handle cases like
2436// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2437// but M and N are just loop-invariant variables.
2438// This should help us handle linearized subscripts;
2439// also makes this test a useful backup to the various SIV tests.
2440//
2441// It occurs to me that the presence of loop-invariant variables
2442// changes the nature of the test from "greatest common divisor"
2443// to "a common divisor".
2444bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
2445 FullDependence &Result) const {
2447 return false;
2448 LLVM_DEBUG(dbgs() << "starting gcd\n");
2449 ++GCDapplications;
2450 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
2451 APInt RunningGCD = APInt::getZero(BitWidth);
2452
2453 // Examine Src coefficients.
2454 // Compute running GCD and record source constant.
2455 // Because we're looking for the constant at the end of the chain,
2456 // we can't quit the loop just because the GCD == 1.
2457 const SCEV *Coefficients = Src;
2458 while (const SCEVAddRecExpr *AddRec =
2459 dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2460 const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2461 // If the coefficient is the product of a constant and other stuff,
2462 // we can use the constant in the GCD computation.
2463 std::optional<APInt> ConstCoeff = getConstantPart(Coeff);
2464 if (!ConstCoeff)
2465 return false;
2466 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
2467 Coefficients = AddRec->getStart();
2468 }
2469 const SCEV *SrcConst = Coefficients;
2470
2471 // Examine Dst coefficients.
2472 // Compute running GCD and record destination constant.
2473 // Because we're looking for the constant at the end of the chain,
2474 // we can't quit the loop just because the GCD == 1.
2475 Coefficients = Dst;
2476 while (const SCEVAddRecExpr *AddRec =
2477 dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2478 const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2479 // If the coefficient is the product of a constant and other stuff,
2480 // we can use the constant in the GCD computation.
2481 std::optional<APInt> ConstCoeff = getConstantPart(Coeff);
2482 if (!ConstCoeff)
2483 return false;
2484 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
2485 Coefficients = AddRec->getStart();
2486 }
2487 const SCEV *DstConst = Coefficients;
2488
2489 APInt ExtraGCD = APInt::getZero(BitWidth);
2490 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
2491 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
2492 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
2493 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
2494 // If Delta is a sum of products, we may be able to make further progress.
2495 for (const SCEV *Operand : Sum->operands()) {
2496 if (isa<SCEVConstant>(Operand)) {
2497 assert(!Constant && "Surprised to find multiple constants");
2498 Constant = cast<SCEVConstant>(Operand);
2499 } else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
2500 // Search for constant operand to participate in GCD;
2501 // If none found; return false.
2502 std::optional<APInt> ConstOp = getConstantPart(Product);
2503 if (!ConstOp)
2504 return false;
2505 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOp->abs());
2506 } else
2507 return false;
2508 }
2509 }
2510 if (!Constant)
2511 return false;
2512 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
2513 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");
2514 if (ConstDelta == 0)
2515 return false;
2516 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
2517 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");
2518 APInt Remainder = ConstDelta.srem(RunningGCD);
2519 if (Remainder != 0) {
2520 ++GCDindependence;
2521 return true;
2522 }
2523
2524 // Try to disprove equal directions.
2525 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2526 // the code above can't disprove the dependence because the GCD = 1.
2527 // So we consider what happen if i = i' and what happens if j = j'.
2528 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2529 // which is infeasible, so we can disallow the = direction for the i level.
2530 // Setting j = j' doesn't help matters, so we end up with a direction vector
2531 // of [<>, *]
2532 //
2533 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2534 // we need to remember that the constant part is 5 and the RunningGCD should
2535 // be initialized to ExtraGCD = 30.
2536 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');
2537
2538 bool Improved = false;
2539 Coefficients = Src;
2540 while (const SCEVAddRecExpr *AddRec =
2541 dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2542 Coefficients = AddRec->getStart();
2543 const Loop *CurLoop = AddRec->getLoop();
2544 RunningGCD = ExtraGCD;
2545 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
2546 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
2547
2548 if (!accumulateCoefficientsGCD(Src, CurLoop, SrcCoeff, RunningGCD) ||
2549 !accumulateCoefficientsGCD(Dst, CurLoop, DstCoeff, RunningGCD))
2550 return false;
2551
2552 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
2553 // If the coefficient is the product of a constant and other stuff,
2554 // we can use the constant in the GCD computation.
2555 std::optional<APInt> ConstCoeff = getConstantPart(Delta);
2556 if (!ConstCoeff)
2557 // The difference of the two coefficients might not be a product
2558 // or constant, in which case we give up on this direction.
2559 continue;
2560 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs());
2561 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2562 if (RunningGCD != 0) {
2563 Remainder = ConstDelta.srem(RunningGCD);
2564 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2565 if (Remainder != 0) {
2566 unsigned Level = mapSrcLoop(CurLoop);
2567 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
2568 Improved = true;
2569 }
2570 }
2571 }
2572 if (Improved)
2573 ++GCDsuccesses;
2574 LLVM_DEBUG(dbgs() << "all done\n");
2575 return false;
2576}
2577
2578//===----------------------------------------------------------------------===//
2579// banerjeeMIVtest -
2580// Use Banerjee's Inequalities to test an MIV subscript pair.
2581// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2582// Generally follows the discussion in Section 2.5.2 of
2583//
2584// Optimizing Supercompilers for Supercomputers
2585// Michael Wolfe
2586//
2587// The inequalities given on page 25 are simplified in that loops are
2588// normalized so that the lower bound is always 0 and the stride is always 1.
2589// For example, Wolfe gives
2590//
2591// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2592//
2593// where A_k is the coefficient of the kth index in the source subscript,
2594// B_k is the coefficient of the kth index in the destination subscript,
2595// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2596// index, and N_k is the stride of the kth index. Since all loops are normalized
2597// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2598// equation to
2599//
2600// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2601// = (A^-_k - B_k)^- (U_k - 1) - B_k
2602//
2603// Similar simplifications are possible for the other equations.
2604//
2605// When we can't determine the number of iterations for a loop,
2606// we use NULL as an indicator for the worst case, infinity.
2607// When computing the upper bound, NULL denotes +inf;
2608// for the lower bound, NULL denotes -inf.
2609//
2610// Return true if dependence disproved.
2611bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,
2612 const SmallBitVector &Loops,
2613 FullDependence &Result) const {
2615 return false;
2616 LLVM_DEBUG(dbgs() << "starting Banerjee\n");
2617 ++BanerjeeApplications;
2618 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');
2619 const SCEV *A0;
2620 CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
2621 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');
2622 const SCEV *B0;
2623 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
2624 BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2625 const SCEV *Delta = SE->getMinusSCEV(B0, A0);
2626 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2627
2628 // Compute bounds for all the * directions.
2629 LLVM_DEBUG(dbgs() << "\tBounds[*]\n");
2630 for (unsigned K = 1; K <= MaxLevels; ++K) {
2631 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2632 Bound[K].Direction = Dependence::DVEntry::ALL;
2633 Bound[K].DirSet = Dependence::DVEntry::NONE;
2634 findBoundsALL(A, B, Bound, K);
2635#ifndef NDEBUG
2636 LLVM_DEBUG(dbgs() << "\t " << K << '\t');
2637 if (Bound[K].Lower[Dependence::DVEntry::ALL])
2638 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2639 else
2640 LLVM_DEBUG(dbgs() << "-inf\t");
2641 if (Bound[K].Upper[Dependence::DVEntry::ALL])
2642 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2643 else
2644 LLVM_DEBUG(dbgs() << "+inf\n");
2645#endif
2646 }
2647
2648 // Test the *, *, *, ... case.
2649 bool Disproved = false;
2650 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
2651 // Explore the direction vector hierarchy.
2652 unsigned DepthExpanded = 0;
2653 unsigned NewDeps =
2654 exploreDirections(1, A, B, Bound, Loops, DepthExpanded, Delta);
2655 if (NewDeps > 0) {
2656 bool Improved = false;
2657 for (unsigned K = 1; K <= CommonLevels; ++K) {
2658 if (Loops[K]) {
2659 unsigned Old = Result.DV[K - 1].Direction;
2660 Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2661 Improved |= Old != Result.DV[K - 1].Direction;
2662 if (!Result.DV[K - 1].Direction) {
2663 Improved = false;
2664 Disproved = true;
2665 break;
2666 }
2667 }
2668 }
2669 if (Improved)
2670 ++BanerjeeSuccesses;
2671 } else {
2672 ++BanerjeeIndependence;
2673 Disproved = true;
2674 }
2675 } else {
2676 ++BanerjeeIndependence;
2677 Disproved = true;
2678 }
2679 delete[] Bound;
2680 delete[] A;
2681 delete[] B;
2682 return Disproved;
2683}
2684
2685// Hierarchically expands the direction vector
2686// search space, combining the directions of discovered dependences
2687// in the DirSet field of Bound. Returns the number of distinct
2688// dependences discovered. If the dependence is disproved,
2689// it will return 0.
2690unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
2691 CoefficientInfo *B, BoundInfo *Bound,
2692 const SmallBitVector &Loops,
2693 unsigned &DepthExpanded,
2694 const SCEV *Delta) const {
2695 // This algorithm has worst case complexity of O(3^n), where 'n' is the number
2696 // of common loop levels. To avoid excessive compile-time, pessimize all the
2697 // results and immediately return when the number of common levels is beyond
2698 // the given threshold.
2699 if (CommonLevels > MIVMaxLevelThreshold) {
2700 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
2701 "direction exploration is terminated.\n");
2702 for (unsigned K = 1; K <= CommonLevels; ++K)
2703 if (Loops[K])
2704 Bound[K].DirSet = Dependence::DVEntry::ALL;
2705 return 1;
2706 }
2707
2708 if (Level > CommonLevels) {
2709 // record result
2710 LLVM_DEBUG(dbgs() << "\t[");
2711 for (unsigned K = 1; K <= CommonLevels; ++K) {
2712 if (Loops[K]) {
2713 Bound[K].DirSet |= Bound[K].Direction;
2714#ifndef NDEBUG
2715 switch (Bound[K].Direction) {
2717 LLVM_DEBUG(dbgs() << " <");
2718 break;
2720 LLVM_DEBUG(dbgs() << " =");
2721 break;
2723 LLVM_DEBUG(dbgs() << " >");
2724 break;
2726 LLVM_DEBUG(dbgs() << " *");
2727 break;
2728 default:
2729 llvm_unreachable("unexpected Bound[K].Direction");
2730 }
2731#endif
2732 }
2733 }
2734 LLVM_DEBUG(dbgs() << " ]\n");
2735 return 1;
2736 }
2737 if (Loops[Level]) {
2738 if (Level > DepthExpanded) {
2739 DepthExpanded = Level;
2740 // compute bounds for <, =, > at current level
2741 findBoundsLT(A, B, Bound, Level);
2742 findBoundsGT(A, B, Bound, Level);
2743 findBoundsEQ(A, B, Bound, Level);
2744#ifndef NDEBUG
2745 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2746 LLVM_DEBUG(dbgs() << "\t <\t");
2747 if (Bound[Level].Lower[Dependence::DVEntry::LT])
2748 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]
2749 << '\t');
2750 else
2751 LLVM_DEBUG(dbgs() << "-inf\t");
2752 if (Bound[Level].Upper[Dependence::DVEntry::LT])
2753 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]
2754 << '\n');
2755 else
2756 LLVM_DEBUG(dbgs() << "+inf\n");
2757 LLVM_DEBUG(dbgs() << "\t =\t");
2758 if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2759 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]
2760 << '\t');
2761 else
2762 LLVM_DEBUG(dbgs() << "-inf\t");
2763 if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2764 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]
2765 << '\n');
2766 else
2767 LLVM_DEBUG(dbgs() << "+inf\n");
2768 LLVM_DEBUG(dbgs() << "\t >\t");
2769 if (Bound[Level].Lower[Dependence::DVEntry::GT])
2770 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]
2771 << '\t');
2772 else
2773 LLVM_DEBUG(dbgs() << "-inf\t");
2774 if (Bound[Level].Upper[Dependence::DVEntry::GT])
2775 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]
2776 << '\n');
2777 else
2778 LLVM_DEBUG(dbgs() << "+inf\n");
2779#endif
2780 }
2781
2782 unsigned NewDeps = 0;
2783
2784 // test bounds for <, *, *, ...
2785 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
2786 NewDeps += exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded,
2787 Delta);
2788
2789 // Test bounds for =, *, *, ...
2790 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
2791 NewDeps += exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded,
2792 Delta);
2793
2794 // test bounds for >, *, *, ...
2795 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
2796 NewDeps += exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded,
2797 Delta);
2798
2799 Bound[Level].Direction = Dependence::DVEntry::ALL;
2800 return NewDeps;
2801 } else
2802 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded,
2803 Delta);
2804}
2805
2806// Returns true iff the current bounds are plausible.
2807bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,
2808 BoundInfo *Bound, const SCEV *Delta) const {
2809 Bound[Level].Direction = DirKind;
2810 if (const SCEV *LowerBound = getLowerBound(Bound))
2811 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
2812 return false;
2813 if (const SCEV *UpperBound = getUpperBound(Bound))
2814 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
2815 return false;
2816 return true;
2817}
2818
2819// Computes the upper and lower bounds for level K
2820// using the * direction. Records them in Bound.
2821// Wolfe gives the equations
2822//
2823// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2824// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2825//
2826// Since we normalize loops, we can simplify these equations to
2827//
2828// LB^*_k = (A^-_k - B^+_k)U_k
2829// UB^*_k = (A^+_k - B^-_k)U_k
2830//
2831// We must be careful to handle the case where the upper bound is unknown.
2832// Note that the lower bound is always <= 0
2833// and the upper bound is always >= 0.
2834void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,
2835 BoundInfo *Bound, unsigned K) const {
2836 Bound[K].Lower[Dependence::DVEntry::ALL] =
2837 nullptr; // Default value = -infinity.
2838 Bound[K].Upper[Dependence::DVEntry::ALL] =
2839 nullptr; // Default value = +infinity.
2840 if (Bound[K].Iterations) {
2841 Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(
2842 SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), Bound[K].Iterations);
2843 Bound[K].Upper[Dependence::DVEntry::ALL] = SE->getMulExpr(
2844 SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), Bound[K].Iterations);
2845 } else {
2846 // If the difference is 0, we won't need to know the number of iterations.
2847 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
2848 Bound[K].Lower[Dependence::DVEntry::ALL] =
2849 SE->getZero(A[K].Coeff->getType());
2850 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
2851 Bound[K].Upper[Dependence::DVEntry::ALL] =
2852 SE->getZero(A[K].Coeff->getType());
2853 }
2854}
2855
2856// Computes the upper and lower bounds for level K
2857// using the = direction. Records them in Bound.
2858// Wolfe gives the equations
2859//
2860// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2861// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2862//
2863// Since we normalize loops, we can simplify these equations to
2864//
2865// LB^=_k = (A_k - B_k)^- U_k
2866// UB^=_k = (A_k - B_k)^+ U_k
2867//
2868// We must be careful to handle the case where the upper bound is unknown.
2869// Note that the lower bound is always <= 0
2870// and the upper bound is always >= 0.
2871void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,
2872 BoundInfo *Bound, unsigned K) const {
2873 Bound[K].Lower[Dependence::DVEntry::EQ] =
2874 nullptr; // Default value = -infinity.
2875 Bound[K].Upper[Dependence::DVEntry::EQ] =
2876 nullptr; // Default value = +infinity.
2877 if (Bound[K].Iterations) {
2878 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2879 const SCEV *NegativePart = getNegativePart(Delta);
2880 Bound[K].Lower[Dependence::DVEntry::EQ] =
2881 SE->getMulExpr(NegativePart, Bound[K].Iterations);
2882 const SCEV *PositivePart = getPositivePart(Delta);
2883 Bound[K].Upper[Dependence::DVEntry::EQ] =
2884 SE->getMulExpr(PositivePart, Bound[K].Iterations);
2885 } else {
2886 // If the positive/negative part of the difference is 0,
2887 // we won't need to know the number of iterations.
2888 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2889 const SCEV *NegativePart = getNegativePart(Delta);
2890 if (NegativePart->isZero())
2891 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2892 const SCEV *PositivePart = getPositivePart(Delta);
2893 if (PositivePart->isZero())
2894 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2895 }
2896}
2897
2898// Computes the upper and lower bounds for level K
2899// using the < direction. Records them in Bound.
2900// Wolfe gives the equations
2901//
2902// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2903// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2904//
2905// Since we normalize loops, we can simplify these equations to
2906//
2907// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2908// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2909//
2910// We must be careful to handle the case where the upper bound is unknown.
2911void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,
2912 BoundInfo *Bound, unsigned K) const {
2913 Bound[K].Lower[Dependence::DVEntry::LT] =
2914 nullptr; // Default value = -infinity.
2915 Bound[K].Upper[Dependence::DVEntry::LT] =
2916 nullptr; // Default value = +infinity.
2917 if (Bound[K].Iterations) {
2918 const SCEV *Iter_1 = SE->getMinusSCEV(
2919 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2920 const SCEV *NegPart =
2921 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2922 Bound[K].Lower[Dependence::DVEntry::LT] =
2923 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
2924 const SCEV *PosPart =
2925 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2926 Bound[K].Upper[Dependence::DVEntry::LT] =
2927 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
2928 } else {
2929 // If the positive/negative part of the difference is 0,
2930 // we won't need to know the number of iterations.
2931 const SCEV *NegPart =
2932 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2933 if (NegPart->isZero())
2934 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2935 const SCEV *PosPart =
2936 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2937 if (PosPart->isZero())
2938 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2939 }
2940}
2941
2942// Computes the upper and lower bounds for level K
2943// using the > direction. Records them in Bound.
2944// Wolfe gives the equations
2945//
2946// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2947// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2948//
2949// Since we normalize loops, we can simplify these equations to
2950//
2951// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2952// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2953//
2954// We must be careful to handle the case where the upper bound is unknown.
2955void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,
2956 BoundInfo *Bound, unsigned K) const {
2957 Bound[K].Lower[Dependence::DVEntry::GT] =
2958 nullptr; // Default value = -infinity.
2959 Bound[K].Upper[Dependence::DVEntry::GT] =
2960 nullptr; // Default value = +infinity.
2961 if (Bound[K].Iterations) {
2962 const SCEV *Iter_1 = SE->getMinusSCEV(
2963 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
2964 const SCEV *NegPart =
2965 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2966 Bound[K].Lower[Dependence::DVEntry::GT] =
2967 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
2968 const SCEV *PosPart =
2969 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2970 Bound[K].Upper[Dependence::DVEntry::GT] =
2971 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
2972 } else {
2973 // If the positive/negative part of the difference is 0,
2974 // we won't need to know the number of iterations.
2975 const SCEV *NegPart =
2976 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2977 if (NegPart->isZero())
2978 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2979 const SCEV *PosPart =
2980 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2981 if (PosPart->isZero())
2982 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2983 }
2984}
2985
2986// X^+ = max(X, 0)
2987const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {
2988 return SE->getSMaxExpr(X, SE->getZero(X->getType()));
2989}
2990
2991// X^- = min(X, 0)
2992const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {
2993 return SE->getSMinExpr(X, SE->getZero(X->getType()));
2994}
2995
2996// Walks through the subscript,
2997// collecting each coefficient, the associated loop bounds,
2998// and recording its positive and negative parts for later use.
2999DependenceInfo::CoefficientInfo *
3000DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,
3001 const SCEV *&Constant) const {
3002 const SCEV *Zero = SE->getZero(Subscript->getType());
3003 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
3004 for (unsigned K = 1; K <= MaxLevels; ++K) {
3005 CI[K].Coeff = Zero;
3006 CI[K].PosPart = Zero;
3007 CI[K].NegPart = Zero;
3008 CI[K].Iterations = nullptr;
3009 }
3010 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
3011 const Loop *L = AddRec->getLoop();
3012 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
3013 CI[K].Coeff = AddRec->getStepRecurrence(*SE);
3014 CI[K].PosPart = getPositivePart(CI[K].Coeff);
3015 CI[K].NegPart = getNegativePart(CI[K].Coeff);
3016 CI[K].Iterations = collectUpperBound(L, Subscript->getType());
3017 Subscript = AddRec->getStart();
3018 }
3019 Constant = Subscript;
3020#ifndef NDEBUG
3021 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");
3022 for (unsigned K = 1; K <= MaxLevels; ++K) {
3023 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);
3024 LLVM_DEBUG(dbgs() << "\tPos Part = ");
3025 LLVM_DEBUG(dbgs() << *CI[K].PosPart);
3026 LLVM_DEBUG(dbgs() << "\tNeg Part = ");
3027 LLVM_DEBUG(dbgs() << *CI[K].NegPart);
3028 LLVM_DEBUG(dbgs() << "\tUpper Bound = ");
3029 if (CI[K].Iterations)
3030 LLVM_DEBUG(dbgs() << *CI[K].Iterations);
3031 else
3032 LLVM_DEBUG(dbgs() << "+inf");
3033 LLVM_DEBUG(dbgs() << '\n');
3034 }
3035 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');
3036#endif
3037 return CI;
3038}
3039
3040// Looks through all the bounds info and
3041// computes the lower bound given the current direction settings
3042// at each level. If the lower bound for any level is -inf,
3043// the result is -inf.
3044const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {
3045 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
3046 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3047 if (Bound[K].Lower[Bound[K].Direction])
3048 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
3049 else
3050 Sum = nullptr;
3051 }
3052 return Sum;
3053}
3054
3055// Looks through all the bounds info and
3056// computes the upper bound given the current direction settings
3057// at each level. If the upper bound at any level is +inf,
3058// the result is +inf.
3059const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {
3060 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
3061 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
3062 if (Bound[K].Upper[Bound[K].Direction])
3063 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
3064 else
3065 Sum = nullptr;
3066 }
3067 return Sum;
3068}
3069
3070//===----------------------------------------------------------------------===//
3071// Constraint manipulation for Delta test.
3072
3073// Given a linear SCEV,
3074// return the coefficient (the step)
3075// corresponding to the specified loop.
3076// If there isn't one, return 0.
3077// For example, given a*i + b*j + c*k, finding the coefficient
3078// corresponding to the j loop would yield b.
3079const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,
3080 const Loop *TargetLoop) const {
3081 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3082 if (!AddRec)
3083 return SE->getZero(Expr->getType());
3084 if (AddRec->getLoop() == TargetLoop)
3085 return AddRec->getStepRecurrence(*SE);
3086 return findCoefficient(AddRec->getStart(), TargetLoop);
3087}
3088
3089// Given a linear SCEV,
3090// return the SCEV given by zeroing out the coefficient
3091// corresponding to the specified loop.
3092// For example, given a*i + b*j + c*k, zeroing the coefficient
3093// corresponding to the j loop would yield a*i + c*k.
3094const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,
3095 const Loop *TargetLoop) const {
3096 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3097 if (!AddRec)
3098 return Expr; // ignore
3099 if (AddRec->getLoop() == TargetLoop)
3100 return AddRec->getStart();
3101 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
3102 AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
3103 AddRec->getNoWrapFlags());
3104}
3105
3106// Given a linear SCEV Expr,
3107// return the SCEV given by adding some Value to the
3108// coefficient corresponding to the specified TargetLoop.
3109// For example, given a*i + b*j + c*k, adding 1 to the coefficient
3110// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
3111const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,
3112 const Loop *TargetLoop,
3113 const SCEV *Value) const {
3114 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
3115 if (!AddRec) // create a new addRec
3116 return SE->getAddRecExpr(Expr, Value, TargetLoop,
3117 SCEV::FlagAnyWrap); // Worst case, with no info.
3118 if (AddRec->getLoop() == TargetLoop) {
3119 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
3120 if (Sum->isZero())
3121 return AddRec->getStart();
3122 return SE->getAddRecExpr(AddRec->getStart(), Sum, AddRec->getLoop(),
3123 AddRec->getNoWrapFlags());
3124 }
3125 if (SE->isLoopInvariant(AddRec, TargetLoop))
3126 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
3127 return SE->getAddRecExpr(
3128 addToCoefficient(AddRec->getStart(), TargetLoop, Value),
3129 AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
3130 AddRec->getNoWrapFlags());
3131}
3132
3133// Review the constraints, looking for opportunities
3134// to simplify a subscript pair (Src and Dst).
3135// Return true if some simplification occurs.
3136// If the simplification isn't exact (that is, if it is conservative
3137// in terms of dependence), set consistent to false.
3138// Corresponds to Figure 5 from the paper
3139//
3140// Practical Dependence Testing
3141// Goff, Kennedy, Tseng
3142// PLDI 1991
3143bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
3144 SmallBitVector &Loops,
3145 SmallVectorImpl<Constraint> &Constraints,
3146 bool &Consistent) {
3147 bool Result = false;
3148 for (unsigned LI : Loops.set_bits()) {
3149 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is");
3150 LLVM_DEBUG(Constraints[LI].dump(dbgs()));
3151 if (Constraints[LI].isDistance())
3152 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
3153 else if (Constraints[LI].isLine())
3154 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
3155 else if (Constraints[LI].isPoint())
3156 Result |= propagatePoint(Src, Dst, Constraints[LI]);
3157 }
3158 return Result;
3159}
3160
3161// Attempt to propagate a distance
3162// constraint into a subscript pair (Src and Dst).
3163// Return true if some simplification occurs.
3164// If the simplification isn't exact (that is, if it is conservative
3165// in terms of dependence), set consistent to false.
3166bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
3167 Constraint &CurConstraint,
3168 bool &Consistent) {
3169 const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3170 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3171 const SCEV *A_K = findCoefficient(Src, CurLoop);
3172 if (A_K->isZero())
3173 return false;
3174 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
3175 Src = SE->getMinusSCEV(Src, DA_K);
3176 Src = zeroCoefficient(Src, CurLoop);
3177 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3178 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3179 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
3180 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3181 if (!findCoefficient(Dst, CurLoop)->isZero())
3182 Consistent = false;
3183 return true;
3184}
3185
3186// Attempt to propagate a line
3187// constraint into a subscript pair (Src and Dst).
3188// Return true if some simplification occurs.
3189// If the simplification isn't exact (that is, if it is conservative
3190// in terms of dependence), set consistent to false.
3191bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
3192 Constraint &CurConstraint,
3193 bool &Consistent) {
3194 const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3195 const SCEV *A = CurConstraint.getA();
3196 const SCEV *B = CurConstraint.getB();
3197 const SCEV *C = CurConstraint.getC();
3198 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C
3199 << "\n");
3200 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
3201 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
3202 if (A->isZero()) {
3203 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
3204 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3205 if (!Bconst || !Cconst)
3206 return false;
3207 APInt Beta = Bconst->getAPInt();
3208 APInt Charlie = Cconst->getAPInt();
3209 APInt CdivB = Charlie.sdiv(Beta);
3210 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
3211 const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3212 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3213 Dst = zeroCoefficient(Dst, CurLoop);
3214 if (!findCoefficient(Src, CurLoop)->isZero())
3215 Consistent = false;
3216 } else if (B->isZero()) {
3217 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3218 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3219 if (!Aconst || !Cconst)
3220 return false;
3221 APInt Alpha = Aconst->getAPInt();
3222 APInt Charlie = Cconst->getAPInt();
3223 APInt CdivA = Charlie.sdiv(Alpha);
3224 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3225 const SCEV *A_K = findCoefficient(Src, CurLoop);
3226 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3227 Src = zeroCoefficient(Src, CurLoop);
3228 if (!findCoefficient(Dst, CurLoop)->isZero())
3229 Consistent = false;
3230 } else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
3231 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3232 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3233 if (!Aconst || !Cconst)
3234 return false;
3235 APInt Alpha = Aconst->getAPInt();
3236 APInt Charlie = Cconst->getAPInt();
3237 APInt CdivA = Charlie.sdiv(Alpha);
3238 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3239 const SCEV *A_K = findCoefficient(Src, CurLoop);
3240 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3241 Src = zeroCoefficient(Src, CurLoop);
3242 Dst = addToCoefficient(Dst, CurLoop, A_K);
3243 if (!findCoefficient(Dst, CurLoop)->isZero())
3244 Consistent = false;
3245 } else {
3246 // paper is incorrect here, or perhaps just misleading
3247 const SCEV *A_K = findCoefficient(Src, CurLoop);
3248 Src = SE->getMulExpr(Src, A);
3249 Dst = SE->getMulExpr(Dst, A);
3250 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
3251 Src = zeroCoefficient(Src, CurLoop);
3252 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
3253 if (!findCoefficient(Dst, CurLoop)->isZero())
3254 Consistent = false;
3255 }
3256 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
3257 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
3258 return true;
3259}
3260
3261// Attempt to propagate a point
3262// constraint into a subscript pair (Src and Dst).
3263// Return true if some simplification occurs.
3264bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
3265 Constraint &CurConstraint) {
3266 const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3267 const SCEV *A_K = findCoefficient(Src, CurLoop);
3268 const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3269 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
3270 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
3271 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3272 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
3273 Src = zeroCoefficient(Src, CurLoop);
3274 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3275 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3276 Dst = zeroCoefficient(Dst, CurLoop);
3277 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3278 return true;
3279}
3280
3281// Update direction vector entry based on the current constraint.
3282void DependenceInfo::updateDirection(Dependence::DVEntry &Level,
3283 const Constraint &CurConstraint) const {
3284 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");
3285 LLVM_DEBUG(CurConstraint.dump(dbgs()));
3286 if (CurConstraint.isAny())
3287 ; // use defaults
3288 else if (CurConstraint.isDistance()) {
3289 // this one is consistent, the others aren't
3290 Level.Scalar = false;
3291 Level.Distance = CurConstraint.getD();
3292 unsigned NewDirection = Dependence::DVEntry::NONE;
3293 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
3294 NewDirection = Dependence::DVEntry::EQ;
3295 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
3296 NewDirection |= Dependence::DVEntry::LT;
3297 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
3298 NewDirection |= Dependence::DVEntry::GT;
3299 Level.Direction &= NewDirection;
3300 } else if (CurConstraint.isLine()) {
3301 Level.Scalar = false;
3302 Level.Distance = nullptr;
3303 // direction should be accurate
3304 } else if (CurConstraint.isPoint()) {
3305 Level.Scalar = false;
3306 Level.Distance = nullptr;
3307 unsigned NewDirection = Dependence::DVEntry::NONE;
3308 if (!isKnownPredicate(CmpInst::ICMP_NE, CurConstraint.getY(),
3309 CurConstraint.getX()))
3310 // if X may be = Y
3311 NewDirection |= Dependence::DVEntry::EQ;
3312 if (!isKnownPredicate(CmpInst::ICMP_SLE, CurConstraint.getY(),
3313 CurConstraint.getX()))
3314 // if Y may be > X
3315 NewDirection |= Dependence::DVEntry::LT;
3316 if (!isKnownPredicate(CmpInst::ICMP_SGE, CurConstraint.getY(),
3317 CurConstraint.getX()))
3318 // if Y may be < X
3319 NewDirection |= Dependence::DVEntry::GT;
3320 Level.Direction &= NewDirection;
3321 } else
3322 llvm_unreachable("constraint has unexpected kind");
3323}
3324
3325/// Check if we can delinearize the subscripts. If the SCEVs representing the
3326/// source and destination array references are recurrences on a nested loop,
3327/// this function flattens the nested recurrences into separate recurrences
3328/// for each loop level.
3329bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
3330 SmallVectorImpl<Subscript> &Pair) {
3331 assert(isLoadOrStore(Src) && "instruction is not load or store");
3332 assert(isLoadOrStore(Dst) && "instruction is not load or store");
3333 Value *SrcPtr = getLoadStorePointerOperand(Src);
3334 Value *DstPtr = getLoadStorePointerOperand(Dst);
3335 Loop *SrcLoop = LI->getLoopFor(Src->getParent());
3336 Loop *DstLoop = LI->getLoopFor(Dst->getParent());
3337 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
3338 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
3339 const SCEVUnknown *SrcBase =
3340 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3341 const SCEVUnknown *DstBase =
3342 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3343
3344 if (!SrcBase || !DstBase || SrcBase != DstBase)
3345 return false;
3346
3347 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
3348
3349 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
3350 SrcSubscripts, DstSubscripts) &&
3351 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
3352 SrcSubscripts, DstSubscripts))
3353 return false;
3354
3355 assert(isLoopInvariant(SrcBase, SrcLoop) &&
3356 isLoopInvariant(DstBase, DstLoop) &&
3357 "Expected SrcBase and DstBase to be loop invariant");
3358
3359 int Size = SrcSubscripts.size();
3360 LLVM_DEBUG({
3361 dbgs() << "\nSrcSubscripts: ";
3362 for (int I = 0; I < Size; I++)
3363 dbgs() << *SrcSubscripts[I];
3364 dbgs() << "\nDstSubscripts: ";
3365 for (int I = 0; I < Size; I++)
3366 dbgs() << *DstSubscripts[I];
3367 });
3368
3369 // The delinearization transforms a single-subscript MIV dependence test into
3370 // a multi-subscript SIV dependence test that is easier to compute. So we
3371 // resize Pair to contain as many pairs of subscripts as the delinearization
3372 // has found, and then initialize the pairs following the delinearization.
3373 Pair.resize(Size);
3374 for (int I = 0; I < Size; ++I) {
3375 Pair[I].Src = SrcSubscripts[I];
3376 Pair[I].Dst = DstSubscripts[I];
3377 unifySubscriptType(&Pair[I]);
3378 }
3379
3380 return true;
3381}
3382
3383/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
3384/// arrays accessed are fixed-size arrays. Return true if delinearization was
3385/// successful.
3386bool DependenceInfo::tryDelinearizeFixedSize(
3387 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3388 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3389 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3390 LLVM_DEBUG({
3391 const SCEVUnknown *SrcBase =
3392 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3393 const SCEVUnknown *DstBase =
3394 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3395 assert(SrcBase && DstBase && SrcBase == DstBase &&
3396 "expected src and dst scev unknowns to be equal");
3397 });
3398
3399 SmallVector<int, 4> SrcSizes;
3400 SmallVector<int, 4> DstSizes;
3401 if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts,
3402 SrcSizes) ||
3403 !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts,
3404 DstSizes))
3405 return false;
3406
3407 // Check that the two size arrays are non-empty and equal in length and
3408 // value.
3409 if (SrcSizes.size() != DstSizes.size() ||
3410 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
3411 SrcSubscripts.clear();
3412 DstSubscripts.clear();
3413 return false;
3414 }
3415
3416 assert(SrcSubscripts.size() == DstSubscripts.size() &&
3417 "Expected equal number of entries in the list of SrcSubscripts and "
3418 "DstSubscripts.");
3419
3420 Value *SrcPtr = getLoadStorePointerOperand(Src);
3421 Value *DstPtr = getLoadStorePointerOperand(Dst);
3422
3423 // In general we cannot safely assume that the subscripts recovered from GEPs
3424 // are in the range of values defined for their corresponding array
3425 // dimensions. For example some C language usage/interpretation make it
3426 // impossible to verify this at compile-time. As such we can only delinearize
3427 // iff the subscripts are positive and are less than the range of the
3428 // dimension.
3430 auto AllIndicesInRange = [&](SmallVector<int, 4> &DimensionSizes,
3431 SmallVectorImpl<const SCEV *> &Subscripts,
3432 Value *Ptr) {
3433 size_t SSize = Subscripts.size();
3434 for (size_t I = 1; I < SSize; ++I) {
3435 const SCEV *S = Subscripts[I];
3436 if (!isKnownNonNegative(S, Ptr)) {
3437 LLVM_DEBUG({
3438 dbgs() << "Check failed: !isKnownNonNegative(S, Ptr)\n";
3439 dbgs() << " S: " << *S << "\n" << " Ptr: " << *Ptr << "\n";
3440 });
3441 return false;
3442 }
3443 if (auto *SType = dyn_cast<IntegerType>(S->getType())) {
3444 const SCEV *Range = SE->getConstant(
3445 ConstantInt::get(SType, DimensionSizes[I - 1], false));
3446 if (!isKnownLessThan(S, Range)) {
3447 LLVM_DEBUG({
3448 dbgs() << "Check failed: !isKnownLessThan(S, Range)\n";
3449 dbgs() << " S: " << *S << "\n"
3450 << " Range: " << *Range << "\n";
3451 });
3452 return false;
3453 }
3454 }
3455 }
3456 return true;
3457 };
3458
3459 if (!AllIndicesInRange(SrcSizes, SrcSubscripts, SrcPtr) ||
3460 !AllIndicesInRange(DstSizes, DstSubscripts, DstPtr)) {
3461 LLVM_DEBUG(dbgs() << "Check failed: AllIndicesInRange.\n");
3462 SrcSubscripts.clear();
3463 DstSubscripts.clear();
3464 return false;
3465 }
3466 }
3467 LLVM_DEBUG({
3468 dbgs() << "Delinearized subscripts of fixed-size array\n"
3469 << "SrcGEP:" << *SrcPtr << "\n"
3470 << "DstGEP:" << *DstPtr << "\n";
3471 });
3472 return true;
3473}
3474
3475bool DependenceInfo::tryDelinearizeParametricSize(
3476 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
3477 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
3478 SmallVectorImpl<const SCEV *> &DstSubscripts) {
3479
3480 Value *SrcPtr = getLoadStorePointerOperand(Src);
3481 Value *DstPtr = getLoadStorePointerOperand(Dst);
3482 const SCEVUnknown *SrcBase =
3483 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
3484 const SCEVUnknown *DstBase =
3485 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
3486 assert(SrcBase && DstBase && SrcBase == DstBase &&
3487 "expected src and dst scev unknowns to be equal");
3488
3489 const SCEV *ElementSize = SE->getElementSize(Src);
3490 if (ElementSize != SE->getElementSize(Dst))
3491 return false;
3492
3493 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
3494 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
3495
3496 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
3497 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
3498 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
3499 return false;
3500
3501 // First step: collect parametric terms in both array references.
3503 collectParametricTerms(*SE, SrcAR, Terms);
3504 collectParametricTerms(*SE, DstAR, Terms);
3505
3506 // Second step: find subscript sizes.
3508 findArrayDimensions(*SE, Terms, Sizes, ElementSize);
3509
3510 // Third step: compute the access functions for each subscript.
3511 computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
3512 computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
3513
3514 // Fail when there is only a subscript: that's a linearized access function.
3515 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
3516 SrcSubscripts.size() != DstSubscripts.size())
3517 return false;
3518
3519 size_t Size = SrcSubscripts.size();
3520
3521 // Statically check that the array bounds are in-range. The first subscript we
3522 // don't have a size for and it cannot overflow into another subscript, so is
3523 // always safe. The others need to be 0 <= subscript[i] < bound, for both src
3524 // and dst.
3525 // FIXME: It may be better to record these sizes and add them as constraints
3526 // to the dependency checks.
3528 for (size_t I = 1; I < Size; ++I) {
3529 bool SNN = isKnownNonNegative(SrcSubscripts[I], SrcPtr);
3530 bool DNN = isKnownNonNegative(DstSubscripts[I], DstPtr);
3531 bool SLT = isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]);
3532 bool DLT = isKnownLessThan(DstSubscripts[I], Sizes[I - 1]);
3533 if (SNN && DNN && SLT && DLT)
3534 continue;
3535
3536 LLVM_DEBUG({
3537 dbgs() << "Delinearization checks failed: can't prove the following\n";
3538 if (!SNN)
3539 dbgs() << " isKnownNonNegative(" << *SrcSubscripts[I] << ")\n";
3540 if (!DNN)
3541 dbgs() << " isKnownNonNegative(" << *DstSubscripts[I] << ")\n";
3542 if (!SLT)
3543 dbgs() << " isKnownLessThan(" << *SrcSubscripts[I] << ", "
3544 << *Sizes[I - 1] << ")\n";
3545 if (!DLT)
3546 dbgs() << " isKnownLessThan(" << *DstSubscripts[I] << ", "
3547 << *Sizes[I - 1] << ")\n";
3548 });
3549 return false;
3550 }
3551
3552 return true;
3553}
3554
3555//===----------------------------------------------------------------------===//
3556
3557#ifndef NDEBUG
3558// For debugging purposes, dump a small bit vector to dbgs().
3560 dbgs() << "{";
3561 for (unsigned VI : BV.set_bits()) {
3562 dbgs() << VI;
3563 if (BV.find_next(VI) >= 0)
3564 dbgs() << ' ';
3565 }
3566 dbgs() << "}\n";
3567}
3568#endif
3569
3571 FunctionAnalysisManager::Invalidator &Inv) {
3572 // Check if the analysis itself has been invalidated.
3573 auto PAC = PA.getChecker<DependenceAnalysis>();
3574 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3575 return true;
3576
3577 // Check transitive dependencies.
3578 return Inv.invalidate<AAManager>(F, PA) ||
3579 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3580 Inv.invalidate<LoopAnalysis>(F, PA);
3581}
3582
3586
3587// depends -
3588// Returns NULL if there is no dependence.
3589// Otherwise, return a Dependence with as many details as possible.
3590// Corresponds to Section 3.1 in the paper
3591//
3592// Practical Dependence Testing
3593// Goff, Kennedy, Tseng
3594// PLDI 1991
3595//
3596// Care is required to keep the routine below, getSplitIteration(),
3597// up to date with respect to this routine.
3598std::unique_ptr<Dependence>
3600 bool UnderRuntimeAssumptions) {
3602 bool PossiblyLoopIndependent = true;
3603 if (Src == Dst)
3604 PossiblyLoopIndependent = false;
3605
3606 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
3607 // if both instructions don't reference memory, there's no dependence
3608 return nullptr;
3609
3610 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
3611 // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3612 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
3613 return std::make_unique<Dependence>(Src, Dst,
3614 SCEVUnionPredicate(Assume, *SE));
3615 }
3616
3617 const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
3618 const MemoryLocation &SrcLoc = MemoryLocation::get(Src);
3619
3620 switch (underlyingObjectsAlias(AA, F->getDataLayout(), DstLoc, SrcLoc)) {
3623 // cannot analyse objects if we don't understand their aliasing.
3624 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
3625 return std::make_unique<Dependence>(Src, Dst,
3626 SCEVUnionPredicate(Assume, *SE));
3628 // If the objects noalias, they are distinct, accesses are independent.
3629 LLVM_DEBUG(dbgs() << "no alias\n");
3630 return nullptr;
3632 break; // The underlying objects alias; test accesses for dependence.
3633 }
3634
3635 if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() ||
3636 !SrcLoc.Size.isPrecise()) {
3637 // The dependence test gets confused if the size of the memory accesses
3638 // differ.
3639 LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
3640 return std::make_unique<Dependence>(Src, Dst,
3641 SCEVUnionPredicate(Assume, *SE));
3642 }
3643
3644 Value *SrcPtr = getLoadStorePointerOperand(Src);
3645 Value *DstPtr = getLoadStorePointerOperand(Dst);
3646 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3647 const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3648 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
3649 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
3650 const SCEV *SrcBase = SE->getPointerBase(SrcSCEV);
3651 const SCEV *DstBase = SE->getPointerBase(DstSCEV);
3652 if (SrcBase != DstBase) {
3653 // If two pointers have different bases, trying to analyze indexes won't
3654 // work; we can't compare them to each other. This can happen, for example,
3655 // if one is produced by an LCSSA PHI node.
3656 //
3657 // We check this upfront so we don't crash in cases where getMinusSCEV()
3658 // returns a SCEVCouldNotCompute.
3659 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
3660 return std::make_unique<Dependence>(Src, Dst,
3661 SCEVUnionPredicate(Assume, *SE));
3662 }
3663
3664 // Even if the base pointers are the same, they may not be loop-invariant. It
3665 // could lead to incorrect results, as we're analyzing loop-carried
3666 // dependencies. Src and Dst can be in different loops, so we need to check
3667 // the base pointer is invariant in both loops.
3668 Loop *SrcLoop = LI->getLoopFor(Src->getParent());
3669 Loop *DstLoop = LI->getLoopFor(Dst->getParent());
3670 if (!isLoopInvariant(SrcBase, SrcLoop) ||
3671 !isLoopInvariant(DstBase, DstLoop)) {
3672 LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
3673 return std::make_unique<Dependence>(Src, Dst,
3674 SCEVUnionPredicate(Assume, *SE));
3675 }
3676
3677 uint64_t EltSize = SrcLoc.Size.toRaw();
3678 const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
3679 const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
3680
3681 // Check that memory access offsets are multiples of element sizes.
3682 if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assume) ||
3683 !SE->isKnownMultipleOf(DstEv, EltSize, Assume)) {
3684 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
3685 return std::make_unique<Dependence>(Src, Dst,
3686 SCEVUnionPredicate(Assume, *SE));
3687 }
3688
3689 if (!Assume.empty()) {
3690 if (!UnderRuntimeAssumptions)
3691 return std::make_unique<Dependence>(Src, Dst,
3692 SCEVUnionPredicate(Assume, *SE));
3693 // Add non-redundant assumptions.
3694 unsigned N = Assumptions.size();
3695 for (const SCEVPredicate *P : Assume) {
3696 bool Implied = false;
3697 for (unsigned I = 0; I != N && !Implied; I++)
3698 if (Assumptions[I]->implies(P, *SE))
3699 Implied = true;
3700 if (!Implied)
3701 Assumptions.push_back(P);
3702 }
3703 }
3704
3705 establishNestingLevels(Src, Dst);
3706 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
3707 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
3708
3709 FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
3710 PossiblyLoopIndependent, CommonLevels);
3711 ++TotalArrayPairs;
3712
3713 unsigned Pairs = 1;
3714 SmallVector<Subscript, 2> Pair(Pairs);
3715 Pair[0].Src = SrcEv;
3716 Pair[0].Dst = DstEv;
3717
3718 if (Delinearize) {
3719 if (tryDelinearize(Src, Dst, Pair)) {
3720 LLVM_DEBUG(dbgs() << " delinearized\n");
3721 Pairs = Pair.size();
3722 }
3723 }
3724
3725 for (unsigned P = 0; P < Pairs; ++P) {
3726 assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
3727 assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
3728 Pair[P].Loops.resize(MaxLevels + 1);
3729 Pair[P].GroupLoops.resize(MaxLevels + 1);
3730 Pair[P].Group.resize(Pairs);
3731 removeMatchingExtensions(&Pair[P]);
3732 Pair[P].Classification =
3733 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), Pair[P].Dst,
3734 LI->getLoopFor(Dst->getParent()), Pair[P].Loops);
3735 Pair[P].GroupLoops = Pair[P].Loops;
3736 Pair[P].Group.set(P);
3737 LLVM_DEBUG(dbgs() << " subscript " << P << "\n");
3738 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3739 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3740 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3741 LLVM_DEBUG(dbgs() << "\tloops = ");
3743 }
3744
3745 SmallBitVector Separable(Pairs);
3746 SmallBitVector Coupled(Pairs);
3747
3748 // Partition subscripts into separable and minimally-coupled groups
3749 // Algorithm in paper is algorithmically better;
3750 // this may be faster in practice. Check someday.
3751 //
3752 // Here's an example of how it works. Consider this code:
3753 //
3754 // for (i = ...) {
3755 // for (j = ...) {
3756 // for (k = ...) {
3757 // for (l = ...) {
3758 // for (m = ...) {
3759 // A[i][j][k][m] = ...;
3760 // ... = A[0][j][l][i + j];
3761 // }
3762 // }
3763 // }
3764 // }
3765 // }
3766 //
3767 // There are 4 subscripts here:
3768 // 0 [i] and [0]
3769 // 1 [j] and [j]
3770 // 2 [k] and [l]
3771 // 3 [m] and [i + j]
3772 //
3773 // We've already classified each subscript pair as ZIV, SIV, etc.,
3774 // and collected all the loops mentioned by pair P in Pair[P].Loops.
3775 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3776 // and set Pair[P].Group = {P}.
3777 //
3778 // Src Dst Classification Loops GroupLoops Group
3779 // 0 [i] [0] SIV {1} {1} {0}
3780 // 1 [j] [j] SIV {2} {2} {1}
3781 // 2 [k] [l] RDIV {3,4} {3,4} {2}
3782 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}
3783 //
3784 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3785 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3786 //
3787 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3788 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3789 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3790 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3791 // to either Separable or Coupled).
3792 //
3793 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3794 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
3795 // so Pair[3].Group = {0, 1, 3} and Done = false.
3796 //
3797 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3798 // Since Done remains true, we add 2 to the set of Separable pairs.
3799 //
3800 // Finally, we consider 3. There's nothing to compare it with,
3801 // so Done remains true and we add it to the Coupled set.
3802 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3803 //
3804 // In the end, we've got 1 separable subscript and 1 coupled group.
3805 for (unsigned SI = 0; SI < Pairs; ++SI) {
3806 if (Pair[SI].Classification == Subscript::NonLinear) {
3807 // ignore these, but collect loops for later
3808 ++NonlinearSubscriptPairs;
3809 collectCommonLoops(Pair[SI].Src, LI->getLoopFor(Src->getParent()),
3810 Pair[SI].Loops);
3811 collectCommonLoops(Pair[SI].Dst, LI->getLoopFor(Dst->getParent()),
3812 Pair[SI].Loops);
3813 Result.Consistent = false;
3814 } else if (Pair[SI].Classification == Subscript::ZIV) {
3815 // always separable
3816 Separable.set(SI);
3817 } else {
3818 // SIV, RDIV, or MIV, so check for coupled group
3819 bool Done = true;
3820 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
3821 SmallBitVector Intersection = Pair[SI].GroupLoops;
3822 Intersection &= Pair[SJ].GroupLoops;
3823 if (Intersection.any()) {
3824 // accumulate set of all the loops in group
3825 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3826 // accumulate set of all subscripts in group
3827 Pair[SJ].Group |= Pair[SI].Group;
3828 Done = false;
3829 }
3830 }
3831 if (Done) {
3832 if (Pair[SI].Group.count() == 1) {
3833 Separable.set(SI);
3834 ++SeparableSubscriptPairs;
3835 } else {
3836 Coupled.set(SI);
3837 ++CoupledSubscriptPairs;
3838 }
3839 }
3840 }
3841 }
3842
3843 LLVM_DEBUG(dbgs() << " Separable = ");
3844 LLVM_DEBUG(dumpSmallBitVector(Separable));
3845 LLVM_DEBUG(dbgs() << " Coupled = ");
3847
3848 Constraint NewConstraint;
3849 NewConstraint.setAny(SE);
3850
3851 // test separable subscripts
3852 for (unsigned SI : Separable.set_bits()) {
3853 LLVM_DEBUG(dbgs() << "testing subscript " << SI);
3854 switch (Pair[SI].Classification) {
3855 case Subscript::ZIV:
3856 LLVM_DEBUG(dbgs() << ", ZIV\n");
3857 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
3858 return nullptr;
3859 break;
3860 case Subscript::SIV: {
3861 LLVM_DEBUG(dbgs() << ", SIV\n");
3862 unsigned Level;
3863 const SCEV *SplitIter = nullptr;
3864 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
3865 SplitIter))
3866 return nullptr;
3867 break;
3868 }
3869 case Subscript::RDIV:
3870 LLVM_DEBUG(dbgs() << ", RDIV\n");
3871 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
3872 return nullptr;
3873 break;
3874 case Subscript::MIV:
3875 LLVM_DEBUG(dbgs() << ", MIV\n");
3876 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
3877 return nullptr;
3878 break;
3879 default:
3880 llvm_unreachable("subscript has unexpected classification");
3881 }
3882 }
3883
3884 if (Coupled.count()) {
3885 // test coupled subscript groups
3886 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");
3887 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
3888 SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3889 for (unsigned II = 0; II <= MaxLevels; ++II)
3890 Constraints[II].setAny(SE);
3891 for (unsigned SI : Coupled.set_bits()) {
3892 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");
3893 SmallBitVector Group(Pair[SI].Group);
3894 SmallBitVector Sivs(Pairs);
3895 SmallBitVector Mivs(Pairs);
3896 SmallBitVector ConstrainedLevels(MaxLevels + 1);
3897 SmallVector<Subscript *, 4> PairsInGroup;
3898 for (unsigned SJ : Group.set_bits()) {
3899 LLVM_DEBUG(dbgs() << SJ << " ");
3900 if (Pair[SJ].Classification == Subscript::SIV)
3901 Sivs.set(SJ);
3902 else
3903 Mivs.set(SJ);
3904 PairsInGroup.push_back(&Pair[SJ]);
3905 }
3906 unifySubscriptType(PairsInGroup);
3907 LLVM_DEBUG(dbgs() << "}\n");
3908 while (Sivs.any()) {
3909 bool Changed = false;
3910 for (unsigned SJ : Sivs.set_bits()) {
3911 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
3912 // SJ is an SIV subscript that's part of the current coupled group
3913 unsigned Level;
3914 const SCEV *SplitIter = nullptr;
3915 LLVM_DEBUG(dbgs() << "SIV\n");
3916 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
3917 SplitIter))
3918 return nullptr;
3919 ConstrainedLevels.set(Level);
3920 if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
3921 if (Constraints[Level].isEmpty()) {
3922 ++DeltaIndependence;
3923 return nullptr;
3924 }
3925 Changed = true;
3926 }
3927 Sivs.reset(SJ);
3928 }
3929 if (Changed) {
3930 // propagate, possibly creating new SIVs and ZIVs
3931 LLVM_DEBUG(dbgs() << " propagating\n");
3932 LLVM_DEBUG(dbgs() << "\tMivs = ");
3934 for (unsigned SJ : Mivs.set_bits()) {
3935 // SJ is an MIV subscript that's part of the current coupled group
3936 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
3937 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
3938 Constraints, Result.Consistent)) {
3939 LLVM_DEBUG(dbgs() << "\t Changed\n");
3940 ++DeltaPropagations;
3941 Pair[SJ].Classification = classifyPair(
3942 Pair[SJ].Src, LI->getLoopFor(Src->getParent()), Pair[SJ].Dst,
3943 LI->getLoopFor(Dst->getParent()), Pair[SJ].Loops);
3944 switch (Pair[SJ].Classification) {
3945 case Subscript::ZIV:
3946 LLVM_DEBUG(dbgs() << "ZIV\n");
3947 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3948 return nullptr;
3949 Mivs.reset(SJ);
3950 break;
3951 case Subscript::SIV:
3952 Sivs.set(SJ);
3953 Mivs.reset(SJ);
3954 break;
3955 case Subscript::RDIV:
3956 case Subscript::MIV:
3957 break;
3958 default:
3959 llvm_unreachable("bad subscript classification");
3960 }
3961 }
3962 }
3963 }
3964 }
3965
3966 // test & propagate remaining RDIVs
3967 for (unsigned SJ : Mivs.set_bits()) {
3968 if (Pair[SJ].Classification == Subscript::RDIV) {
3969 LLVM_DEBUG(dbgs() << "RDIV test\n");
3970 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3971 return nullptr;
3972 // I don't yet understand how to propagate RDIV results
3973 Mivs.reset(SJ);
3974 }
3975 }
3976
3977 // test remaining MIVs
3978 // This code is temporary.
3979 // Better to somehow test all remaining subscripts simultaneously.
3980 for (unsigned SJ : Mivs.set_bits()) {
3981 if (Pair[SJ].Classification == Subscript::MIV) {
3982 LLVM_DEBUG(dbgs() << "MIV test\n");
3983 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
3984 return nullptr;
3985 } else
3986 llvm_unreachable("expected only MIV subscripts at this point");
3987 }
3988
3989 // update Result.DV from constraint vector
3990 LLVM_DEBUG(dbgs() << " updating\n");
3991 for (unsigned SJ : ConstrainedLevels.set_bits()) {
3992 if (SJ > CommonLevels)
3993 break;
3994 updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
3995 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
3996 return nullptr;
3997 }
3998 }
3999 }
4000
4001 // Make sure the Scalar flags are set correctly.
4002 SmallBitVector CompleteLoops(MaxLevels + 1);
4003 for (unsigned SI = 0; SI < Pairs; ++SI)
4004 CompleteLoops |= Pair[SI].Loops;
4005 for (unsigned II = 1; II <= CommonLevels; ++II)
4006 if (CompleteLoops[II])
4007 Result.DV[II - 1].Scalar = false;
4008
4009 // Set the distance to zero if the direction is EQ.
4010 // TODO: Ideally, the distance should be set to 0 immediately simultaneously
4011 // with the corresponding direction being set to EQ.
4012 for (unsigned II = 1; II <= Result.getLevels(); ++II) {
4013 if (Result.getDirection(II) == Dependence::DVEntry::EQ) {
4014 if (Result.DV[II - 1].Distance == nullptr)
4015 Result.DV[II - 1].Distance = SE->getZero(SrcSCEV->getType());
4016 else
4017 assert(Result.DV[II - 1].Distance->isZero() &&
4018 "Inconsistency between distance and direction");
4019 }
4020
4021#ifndef NDEBUG
4022 // Check that the converse (i.e., if the distance is zero, then the
4023 // direction is EQ) holds.
4024 const SCEV *Distance = Result.getDistance(II);
4025 if (Distance && Distance->isZero())
4026 assert(Result.getDirection(II) == Dependence::DVEntry::EQ &&
4027 "Distance is zero, but direction is not EQ");
4028#endif
4029 }
4030
4031 if (PossiblyLoopIndependent) {
4032 // Make sure the LoopIndependent flag is set correctly.
4033 // All directions must include equal, otherwise no
4034 // loop-independent dependence is possible.
4035 for (unsigned II = 1; II <= CommonLevels; ++II) {
4036 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
4037 Result.LoopIndependent = false;
4038 break;
4039 }
4040 }
4041 } else {
4042 // On the other hand, if all directions are equal and there's no
4043 // loop-independent dependence possible, then no dependence exists.
4044 bool AllEqual = true;
4045 for (unsigned II = 1; II <= CommonLevels; ++II) {
4046 if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
4047 AllEqual = false;
4048 break;
4049 }
4050 }
4051 if (AllEqual)
4052 return nullptr;
4053 }
4054
4055 return std::make_unique<FullDependence>(std::move(Result));
4056}
4057
4058//===----------------------------------------------------------------------===//
4059// getSplitIteration -
4060// Rather than spend rarely-used space recording the splitting iteration
4061// during the Weak-Crossing SIV test, we re-compute it on demand.
4062// The re-computation is basically a repeat of the entire dependence test,
4063// though simplified since we know that the dependence exists.
4064// It's tedious, since we must go through all propagations, etc.
4065//
4066// Care is required to keep this code up to date with respect to the routine
4067// above, depends().
4068//
4069// Generally, the dependence analyzer will be used to build
4070// a dependence graph for a function (basically a map from instructions
4071// to dependences). Looking for cycles in the graph shows us loops
4072// that cannot be trivially vectorized/parallelized.
4073//
4074// We can try to improve the situation by examining all the dependences
4075// that make up the cycle, looking for ones we can break.
4076// Sometimes, peeling the first or last iteration of a loop will break
4077// dependences, and we've got flags for those possibilities.
4078// Sometimes, splitting a loop at some other iteration will do the trick,
4079// and we've got a flag for that case. Rather than waste the space to
4080// record the exact iteration (since we rarely know), we provide
4081// a method that calculates the iteration. It's a drag that it must work
4082// from scratch, but wonderful in that it's possible.
4083//
4084// Here's an example:
4085//
4086// for (i = 0; i < 10; i++)
4087// A[i] = ...
4088// ... = A[11 - i]
4089//
4090// There's a loop-carried flow dependence from the store to the load,
4091// found by the weak-crossing SIV test. The dependence will have a flag,
4092// indicating that the dependence can be broken by splitting the loop.
4093// Calling getSplitIteration will return 5.
4094// Splitting the loop breaks the dependence, like so:
4095//
4096// for (i = 0; i <= 5; i++)
4097// A[i] = ...
4098// ... = A[11 - i]
4099// for (i = 6; i < 10; i++)
4100// A[i] = ...
4101// ... = A[11 - i]
4102//
4103// breaks the dependence and allows us to vectorize/parallelize
4104// both loops.
4106 unsigned SplitLevel) {
4107 assert(Dep.isSplitable(SplitLevel) &&
4108 "Dep should be splitable at SplitLevel");
4109 Instruction *Src = Dep.getSrc();
4110 Instruction *Dst = Dep.getDst();
4111 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
4112 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
4113 assert(isLoadOrStore(Src));
4114 assert(isLoadOrStore(Dst));
4115 Value *SrcPtr = getLoadStorePointerOperand(Src);
4116 Value *DstPtr = getLoadStorePointerOperand(Dst);
4118 AA, F->getDataLayout(), MemoryLocation::get(Dst),
4120
4121 // establish loop nesting levels
4122 establishNestingLevels(Src, Dst);
4123
4124 FullDependence Result(Src, Dst, Dep.Assumptions, false, CommonLevels);
4125
4126 unsigned Pairs = 1;
4127 SmallVector<Subscript, 2> Pair(Pairs);
4128 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
4129 const SCEV *DstSCEV = SE->getSCEV(DstPtr);
4130 Pair[0].Src = SE->removePointerBase(SrcSCEV);
4131 Pair[0].Dst = SE->removePointerBase(DstSCEV);
4132
4133 if (Delinearize) {
4134 if (tryDelinearize(Src, Dst, Pair)) {
4135 LLVM_DEBUG(dbgs() << " delinearized\n");
4136 Pairs = Pair.size();
4137 }
4138 }
4139
4140 for (unsigned P = 0; P < Pairs; ++P) {
4141 assert(Pair[P].Src->getType()->isIntegerTy() && "Src must be an integer");
4142 assert(Pair[P].Dst->getType()->isIntegerTy() && "Dst must be an integer");
4143 Pair[P].Loops.resize(MaxLevels + 1);
4144 Pair[P].GroupLoops.resize(MaxLevels + 1);
4145 Pair[P].Group.resize(Pairs);
4146 removeMatchingExtensions(&Pair[P]);
4147 Pair[P].Classification =
4148 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), Pair[P].Dst,
4149 LI->getLoopFor(Dst->getParent()), Pair[P].Loops);
4150 Pair[P].GroupLoops = Pair[P].Loops;
4151 Pair[P].Group.set(P);
4152 }
4153
4154 SmallBitVector Separable(Pairs);
4155 SmallBitVector Coupled(Pairs);
4156
4157 // partition subscripts into separable and minimally-coupled groups
4158 for (unsigned SI = 0; SI < Pairs; ++SI) {
4159 if (Pair[SI].Classification == Subscript::NonLinear) {
4160 // ignore these, but collect loops for later
4161 collectCommonLoops(Pair[SI].Src, LI->getLoopFor(Src->getParent()),
4162 Pair[SI].Loops);
4163 collectCommonLoops(Pair[SI].Dst, LI->getLoopFor(Dst->getParent()),
4164 Pair[SI].Loops);
4165 Result.Consistent = false;
4166 } else if (Pair[SI].Classification == Subscript::ZIV)
4167 Separable.set(SI);
4168 else {
4169 // SIV, RDIV, or MIV, so check for coupled group
4170 bool Done = true;
4171 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
4172 SmallBitVector Intersection = Pair[SI].GroupLoops;
4173 Intersection &= Pair[SJ].GroupLoops;
4174 if (Intersection.any()) {
4175 // accumulate set of all the loops in group
4176 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
4177 // accumulate set of all subscripts in group
4178 Pair[SJ].Group |= Pair[SI].Group;
4179 Done = false;
4180 }
4181 }
4182 if (Done) {
4183 if (Pair[SI].Group.count() == 1)
4184 Separable.set(SI);
4185 else
4186 Coupled.set(SI);
4187 }
4188 }
4189 }
4190
4191 Constraint NewConstraint;
4192 NewConstraint.setAny(SE);
4193
4194 // test separable subscripts
4195 for (unsigned SI : Separable.set_bits()) {
4196 switch (Pair[SI].Classification) {
4197 case Subscript::SIV: {
4198 unsigned Level;
4199 const SCEV *SplitIter = nullptr;
4200 (void)testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
4201 SplitIter);
4202 if (Level == SplitLevel) {
4203 assert(SplitIter != nullptr);
4204 return SplitIter;
4205 }
4206 break;
4207 }
4208 case Subscript::ZIV:
4209 case Subscript::RDIV:
4210 case Subscript::MIV:
4211 break;
4212 default:
4213 llvm_unreachable("subscript has unexpected classification");
4214 }
4215 }
4216
4217 assert(!Coupled.empty() && "coupled expected non-empty");
4218
4219 // test coupled subscript groups
4220 SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
4221 for (unsigned II = 0; II <= MaxLevels; ++II)
4222 Constraints[II].setAny(SE);
4223 for (unsigned SI : Coupled.set_bits()) {
4224 SmallBitVector Group(Pair[SI].Group);
4225 SmallBitVector Sivs(Pairs);
4226 SmallBitVector Mivs(Pairs);
4227 SmallBitVector ConstrainedLevels(MaxLevels + 1);
4228 for (unsigned SJ : Group.set_bits()) {
4229 if (Pair[SJ].Classification == Subscript::SIV)
4230 Sivs.set(SJ);
4231 else
4232 Mivs.set(SJ);
4233 }
4234 while (Sivs.any()) {
4235 bool Changed = false;
4236 for (unsigned SJ : Sivs.set_bits()) {
4237 // SJ is an SIV subscript that's part of the current coupled group
4238 unsigned Level;
4239 const SCEV *SplitIter = nullptr;
4240 (void)testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
4241 SplitIter);
4242 if (Level == SplitLevel && SplitIter)
4243 return SplitIter;
4244 ConstrainedLevels.set(Level);
4245 if (intersectConstraints(&Constraints[Level], &NewConstraint))
4246 Changed = true;
4247 Sivs.reset(SJ);
4248 }
4249 if (!Changed)
4250 continue;
4251 // propagate, possibly creating new SIVs and ZIVs
4252 for (unsigned SJ : Mivs.set_bits()) {
4253 // SJ is an MIV subscript that's part of the current coupled group
4254 if (!propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Constraints,
4255 Result.Consistent))
4256 continue;
4257 Pair[SJ].Classification = classifyPair(
4258 Pair[SJ].Src, LI->getLoopFor(Src->getParent()), Pair[SJ].Dst,
4259 LI->getLoopFor(Dst->getParent()), Pair[SJ].Loops);
4260 switch (Pair[SJ].Classification) {
4261 case Subscript::ZIV:
4262 Mivs.reset(SJ);
4263 break;
4264 case Subscript::SIV:
4265 Sivs.set(SJ);
4266 Mivs.reset(SJ);
4267 break;
4268 case Subscript::RDIV:
4269 case Subscript::MIV:
4270 break;
4271 default:
4272 llvm_unreachable("bad subscript classification");
4273 }
4274 }
4275 }
4276 }
4277 llvm_unreachable("somehow reached end of routine");
4278}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
static bool isLoadOrStore(const Instruction *I)
static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, const APInt &Delta, APInt &G, APInt &X, APInt &Y)
static void dumpSmallBitVector(SmallBitVector &BV)
static APInt ceilingOfQuotient(const APInt &A, const APInt &B)
static APInt floorOfQuotient(const APInt &A, const APInt &B)
static AliasResult underlyingObjectsAlias(AAResults *AA, const DataLayout &DL, const MemoryLocation &LocA, const MemoryLocation &LocB)
static cl::opt< bool > RunSIVRoutinesOnly("da-run-siv-routines-only", cl::init(false), cl::ReallyHidden, cl::desc("Run only SIV routines and disable others (ZIV, RDIV, and MIV). " "The purpose is mainly to exclude the influence of those routines " "in regression tests for SIV routines."))
static std::pair< std::optional< APInt >, std::optional< APInt > > inferDomainOfAffine(const APInt &A, const APInt &B, const std::optional< APInt > &UB)
Given an affine expression of the form A*k + B, where k is an arbitrary integer, infer the possible r...
static std::optional< APInt > getConstantPart(const SCEV *Expr)
static bool isRemainderZero(const SCEVConstant *Dividend, const SCEVConstant *Divisor)
static cl::opt< bool > Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::desc("Try to delinearize array references."))
static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, ScalarEvolution &SE, bool NormalizeResults)
static cl::opt< unsigned > MIVMaxLevelThreshold("da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::desc("Maximum depth allowed for the recursive algorithm used to " "explore MIV direction vectors."))
static cl::opt< bool > DisableDelinearizationChecks("da-disable-delinearization-checks", cl::Hidden, cl::desc("Disable checks that try to statically verify validity of " "delinearized subscripts. Enabling this option may result in incorrect " "dependence vectors for languages that allow the subscript of one " "dimension to underflow or overflow into another dimension."))
Hexagon Hardware Loops
Module.h This file contains the declarations for the Module class.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static bool isInput(const ArrayRef< StringRef > &Prefixes, StringRef Arg)
Definition OptTable.cpp:146
#define P(N)
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition APInt.cpp:1890
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1166
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition APInt.h:1237
The possible results of an alias query.
@ MayAlias
The two locations may or may not alias.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
void enableCrossIterationMode()
Assume that values may come from different cycle iterations.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Legacy pass manager pass to access dependence information.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void print(raw_ostream &, const Module *=nullptr) const override
print - Print out the internal state of the pass.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
AnalysisPass to compute dependence information in a function.
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &FAM)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle transitive invalidation when the cached analysis results go away.
LLVM_ABI const SCEV * getSplitIteration(const Dependence &Dep, unsigned Level)
getSplitIteration - Give a dependence that's splittable at some particular level, return the iteratio...
LLVM_ABI SCEVUnionPredicate getRuntimeAssumptions() const
getRuntimeAssumptions - Returns all the runtime assumptions under which the dependence test is valid.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
Dependence - This class represents a dependence between two memory memory references in a function.
virtual unsigned getDirection(unsigned Level) const
getDirection - Returns the direction associated with a particular level.
virtual bool isPeelFirst(unsigned Level) const
isPeelFirst - Returns true if peeling the first iteration from this loop will break this dependence.
Instruction * getDst() const
getDst - Returns the destination instruction for this dependence.
Dependence(Dependence &&)=default
virtual bool isConfused() const
isConfused - Returns true if this dependence is confused (the compiler understands nothing and makes ...
virtual bool isSplitable(unsigned Level) const
isSplitable - Returns true if splitting this loop will break the dependence.
virtual const SCEV * getDistance(unsigned Level) const
getDistance - Returns the distance (or NULL) associated with a particular level.
virtual bool isScalar(unsigned Level) const
isScalar - Returns true if a particular level is scalar; that is, if no subscript in the source or de...
virtual bool isConsistent() const
isConsistent - Returns true if this dependence is consistent (occurs every time the source and destin...
virtual bool isPeelLast(unsigned Level) const
isPeelLast - Returns true if peeling the last iteration from this loop will break this dependence.
virtual unsigned getLevels() const
getLevels - Returns the number of common loops surrounding the source and destination of the dependen...
bool isFlow() const
isFlow - Returns true if this is a flow (aka true) dependence.
bool isInput() const
isInput - Returns true if this is an input dependence.
bool isAnti() const
isAnti - Returns true if this is an anti dependence.
Instruction * getSrc() const
getSrc - Returns the source instruction for this dependence.
virtual bool isLoopIndependent() const
isLoopIndependent - Returns true if this is a loop-independent dependence.
bool isOutput() const
isOutput - Returns true if this is an output dependence.
void dump(raw_ostream &OS) const
dump - For debugging purposes, dumps a dependence to OS.
FullDependence - This class represents a dependence between two memory references in a function.
unsigned getDirection(unsigned Level) const override
getDirection - Returns the direction associated with a particular level.
const SCEV * getDistance(unsigned Level) const override
getDistance - Returns the distance (or NULL) associated with a particular level.
FullDependence(Instruction *Source, Instruction *Destination, const SCEVUnionPredicate &Assumes, bool PossiblyLoopIndependent, unsigned Levels)
bool isDirectionNegative() const override
Check if the direction vector is negative.
bool isSplitable(unsigned Level) const override
isSplitable - Returns true if splitting the loop will break the dependence.
bool isScalar(unsigned Level) const override
isScalar - Returns true if a particular level is scalar; that is, if no subscript in the source or de...
bool isPeelLast(unsigned Level) const override
isPeelLast - Returns true if peeling the last iteration from this loop will break this dependence.
bool isPeelFirst(unsigned Level) const override
isPeelFirst - Returns true if peeling the first iteration from this loop will break this dependence.
bool normalize(ScalarEvolution *SE) override
If the direction vector is negative, normalize the direction vector to make it non-negative.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
An instruction for reading from memory.
bool isPrecise() const
uint64_t toRaw() const
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
const LoopT * getOutermostLoop() const
Get the outermost loop in which this loop is contained.
unsigned getLoopDepth() const
Return the nesting level of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents a constant integer value.
const APInt & getAPInt() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
void print(raw_ostream &OS, unsigned Depth) const override
Prints a textual representation of this predicate with an indentation of Depth.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
bool any() const
Returns true if any bit is set.
bool empty() const
Tests whether there are no bits in this bitvector.
size_type count() const
Returns the number of bits which are set.
SmallBitVector & reset()
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ TB
TB - TwoByte - Set if this instruction has a two byte opcode, which starts with a 0x0F byte before th...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
InstIterator< SymbolTableList< BasicBlock >, Function::iterator, BasicBlock::iterator, Instruction > inst_iterator
void collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Terms)
Collect parametric terms occurring in step expressions (first step of delinearization).
void findArrayDimensions(ScalarEvolution &SE, SmallVectorImpl< const SCEV * > &Terms, SmallVectorImpl< const SCEV * > &Sizes, const SCEV *ElementSize)
Compute the array dimensions Sizes from the set of Terms extracted from the memory access function of...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
@ Done
Definition Threading.h:60
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
inst_iterator inst_begin(Function *F)
void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< const SCEV * > &Sizes)
Return in Subscripts the access functions for each dimension in Sizes (third step of delinearization)...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
inst_iterator inst_end(Function *F)
bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn, SmallVectorImpl< const SCEV * > &Subscripts, SmallVectorImpl< int > &Sizes)
Implementation of fixed size array delinearization.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI FunctionPass * createDependenceAnalysisWrapperPass()
createDependenceAnalysisPass - This creates an instance of the DependenceAnalysis wrapper pass.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Dependence::DVEntry - Each level in the distance/direction vector has a direction (or perhaps a union...