Coverage Report

Created: 2024-11-21 17:23

/home/runner/work/DirectXShaderCompiler/DirectXShaderCompiler/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----------------------- AlignmentFromAssumptions.cpp -----------------===//
2
//                  Set Load/Store Alignments From Assumptions
3
//
4
//                     The LLVM Compiler Infrastructure
5
//
6
// This file is distributed under the University of Illinois Open Source
7
// License. See LICENSE.TXT for details.
8
//
9
//===----------------------------------------------------------------------===//
10
//
11
// This file implements a ScalarEvolution-based transformation to set
12
// the alignments of load, stores and memory intrinsics based on the truth
13
// expressions of assume intrinsics. The primary motivation is to handle
14
// complex alignment assumptions that apply to vector loads and stores that
15
// appear after vectorization and unrolling.
16
//
17
//===----------------------------------------------------------------------===//
18
19
#define AA_NAME "alignment-from-assumptions"
20
#define DEBUG_TYPE AA_NAME
21
#include "llvm/Transforms/Scalar.h"
22
#include "llvm/ADT/SmallPtrSet.h"
23
#include "llvm/ADT/Statistic.h"
24
#include "llvm/Analysis/AssumptionCache.h"
25
#include "llvm/Analysis/LoopInfo.h"
26
#include "llvm/Analysis/ScalarEvolution.h"
27
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28
#include "llvm/Analysis/ValueTracking.h"
29
#include "llvm/IR/Constant.h"
30
#include "llvm/IR/Dominators.h"
31
#include "llvm/IR/Instruction.h"
32
#include "llvm/IR/IntrinsicInst.h"
33
#include "llvm/IR/Intrinsics.h"
34
#include "llvm/IR/Module.h"
35
#include "llvm/Support/Debug.h"
36
#include "llvm/Support/raw_ostream.h"
37
using namespace llvm;
38
39
STATISTIC(NumLoadAlignChanged,
40
  "Number of loads changed by alignment assumptions");
41
STATISTIC(NumStoreAlignChanged,
42
  "Number of stores changed by alignment assumptions");
43
STATISTIC(NumMemIntAlignChanged,
44
  "Number of memory intrinsics changed by alignment assumptions");
45
46
namespace {
47
struct AlignmentFromAssumptions : public FunctionPass {
48
  static char ID; // Pass identification, replacement for typeid
49
17.6k
  AlignmentFromAssumptions() : FunctionPass(ID) {
50
17.6k
    initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
51
17.6k
  }
52
53
  bool runOnFunction(Function &F) override;
54
55
17.6k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
56
17.6k
    AU.addRequired<AssumptionCacheTracker>();
57
17.6k
    AU.addRequired<ScalarEvolution>();
58
17.6k
    AU.addRequired<DominatorTreeWrapperPass>();
59
60
17.6k
    AU.setPreservesCFG();
61
17.6k
    AU.addPreserved<LoopInfoWrapperPass>();
62
17.6k
    AU.addPreserved<DominatorTreeWrapperPass>();
63
17.6k
    AU.addPreserved<ScalarEvolution>();
64
17.6k
  }
65
66
  // For memory transfers, we need a common alignment for both the source and
67
  // destination. If we have a new alignment for only one operand of a transfer
68
  // instruction, save it in these maps.  If we reach the other operand through
69
  // another assumption later, then we may change the alignment at that point.
70
  DenseMap<MemTransferInst *, unsigned> NewDestAlignments, NewSrcAlignments;
71
72
  ScalarEvolution *SE;
73
  DominatorTree *DT;
74
75
  bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
76
                            const SCEV *&OffSCEV);
77
  bool processAssumption(CallInst *I);
78
};
79
}
80
81
char AlignmentFromAssumptions::ID = 0;
82
static const char aip_name[] = "Alignment from assumptions";
83
5.62k
INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
84
5.62k
                      aip_name, false, false)
85
5.62k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
86
5.62k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
87
5.62k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
88
5.62k
INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
89
                    aip_name, false, false)
90
91
16.8k
FunctionPass *llvm::createAlignmentFromAssumptionsPass() {
92
16.8k
  return new AlignmentFromAssumptions();
93
16.8k
}
94
95
// Given an expression for the (constant) alignment, AlignSCEV, and an
96
// expression for the displacement between a pointer and the aligned address,
97
// DiffSCEV, compute the alignment of the displaced pointer if it can be reduced
98
// to a constant. Using SCEV to compute alignment handles the case where
99
// DiffSCEV is a recurrence with constant start such that the aligned offset
100
// is constant. e.g. {16,+,32} % 32 -> 16.
101
static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
102
                                    const SCEV *AlignSCEV,
103
0
                                    ScalarEvolution *SE) {
104
  // DiffUnits = Diff % int64_t(Alignment)
105
0
  const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV);
106
0
  const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV);
107
0
  const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV);
108
109
0
  DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " <<
110
0
                  *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");
111
112
0
  if (const SCEVConstant *ConstDUSCEV =
113
0
      dyn_cast<SCEVConstant>(DiffUnitsSCEV)) {
114
0
    int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue();
115
116
    // If the displacement is an exact multiple of the alignment, then the
117
    // displaced pointer has the same alignment as the aligned pointer, so
118
    // return the alignment value.
119
0
    if (!DiffUnits)
120
0
      return (unsigned)
121
0
        cast<SCEVConstant>(AlignSCEV)->getValue()->getSExtValue();
122
123
    // If the displacement is not an exact multiple, but the remainder is a
124
    // constant, then return this remainder (but only if it is a power of 2).
125
0
    uint64_t DiffUnitsAbs = std::abs(DiffUnits);
126
0
    if (isPowerOf2_64(DiffUnitsAbs))
127
0
      return (unsigned) DiffUnitsAbs;
128
0
  }
129
130
0
  return 0;
131
0
}
132
133
// There is an address given by an offset OffSCEV from AASCEV which has an
134
// alignment AlignSCEV. Use that information, if possible, to compute a new
135
// alignment for Ptr.
136
static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
137
                                const SCEV *OffSCEV, Value *Ptr,
138
0
                                ScalarEvolution *SE) {
139
0
  const SCEV *PtrSCEV = SE->getSCEV(Ptr);
140
0
  const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
141
142
  // On 32-bit platforms, DiffSCEV might now have type i32 -- we've always
143
  // sign-extended OffSCEV to i64, so make sure they agree again.
144
0
  DiffSCEV = SE->getNoopOrSignExtend(DiffSCEV, OffSCEV->getType());
145
146
  // What we really want to know is the overall offset to the aligned
147
  // address. This address is displaced by the provided offset.
148
0
  DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV);
149
150
0
  DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to " <<
151
0
                  *AlignSCEV << " and offset " << *OffSCEV <<
152
0
                  " using diff " << *DiffSCEV << "\n");
153
154
0
  unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE);
155
0
  DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n");
156
157
0
  if (NewAlignment) {
158
0
    return NewAlignment;
159
0
  } else if (const SCEVAddRecExpr *DiffARSCEV =
160
0
             dyn_cast<SCEVAddRecExpr>(DiffSCEV)) {
161
    // The relative offset to the alignment assumption did not yield a constant,
162
    // but we should try harder: if we assume that a is 32-byte aligned, then in
163
    // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are
164
    // 32-byte aligned, but instead alternate between 32 and 16-byte alignment.
165
    // As a result, the new alignment will not be a constant, but can still
166
    // be improved over the default (of 4) to 16.
167
168
0
    const SCEV *DiffStartSCEV = DiffARSCEV->getStart();
169
0
    const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE);
170
171
0
    DEBUG(dbgs() << "\ttrying start/inc alignment using start " <<
172
0
                    *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n");
173
174
    // Now compute the new alignment using the displacement to the value in the
175
    // first iteration, and also the alignment using the per-iteration delta.
176
    // If these are the same, then use that answer. Otherwise, use the smaller
177
    // one, but only if it divides the larger one.
178
0
    NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE);
179
0
    unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE);
180
181
0
    DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n");
182
0
    DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n");
183
184
0
    if (!NewAlignment || !NewIncAlignment) {
185
0
      return 0;
186
0
    } else if (NewAlignment > NewIncAlignment) {
187
0
      if (NewAlignment % NewIncAlignment == 0) {
188
0
        DEBUG(dbgs() << "\tnew start/inc alignment: " <<
189
0
                        NewIncAlignment << "\n");
190
0
        return NewIncAlignment;
191
0
      }
192
0
    } else if (NewIncAlignment > NewAlignment) {
193
0
      if (NewIncAlignment % NewAlignment == 0) {
194
0
        DEBUG(dbgs() << "\tnew start/inc alignment: " <<
195
0
                        NewAlignment << "\n");
196
0
        return NewAlignment;
197
0
      }
198
0
    } else if (NewIncAlignment == NewAlignment) {
199
0
      DEBUG(dbgs() << "\tnew start/inc alignment: " <<
200
0
                      NewAlignment << "\n");
201
0
      return NewAlignment;
202
0
    }
203
0
  }
204
205
0
  return 0;
206
0
}
207
208
bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
209
                                 Value *&AAPtr, const SCEV *&AlignSCEV,
210
0
                                 const SCEV *&OffSCEV) {
211
  // An alignment assume must be a statement about the least-significant
212
  // bits of the pointer being zero, possibly with some offset.
213
0
  ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
214
0
  if (!ICI)
215
0
    return false;
216
217
  // This must be an expression of the form: x & m == 0.
218
0
  if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
219
0
    return false;
220
221
  // Swap things around so that the RHS is 0.
222
0
  Value *CmpLHS = ICI->getOperand(0);
223
0
  Value *CmpRHS = ICI->getOperand(1);
224
0
  const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
225
0
  const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
226
0
  if (CmpLHSSCEV->isZero())
227
0
    std::swap(CmpLHS, CmpRHS);
228
0
  else if (!CmpRHSSCEV->isZero())
229
0
    return false;
230
231
0
  BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
232
0
  if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
233
0
    return false;
234
235
  // Swap things around so that the right operand of the and is a constant
236
  // (the mask); we cannot deal with variable masks.
237
0
  Value *AndLHS = CmpBO->getOperand(0);
238
0
  Value *AndRHS = CmpBO->getOperand(1);
239
0
  const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
240
0
  const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
241
0
  if (isa<SCEVConstant>(AndLHSSCEV)) {
242
0
    std::swap(AndLHS, AndRHS);
243
0
    std::swap(AndLHSSCEV, AndRHSSCEV);
244
0
  }
245
246
0
  const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
247
0
  if (!MaskSCEV)
248
0
    return false;
249
250
  // The mask must have some trailing ones (otherwise the condition is
251
  // trivial and tells us nothing about the alignment of the left operand).
252
0
  unsigned TrailingOnes =
253
0
    MaskSCEV->getValue()->getValue().countTrailingOnes();
254
0
  if (!TrailingOnes)
255
0
    return false;
256
257
  // Cap the alignment at the maximum with which LLVM can deal (and make sure
258
  // we don't overflow the shift).
259
0
  uint64_t Alignment;
260
0
  TrailingOnes = std::min(TrailingOnes,
261
0
    unsigned(sizeof(unsigned) * CHAR_BIT - 1));
262
0
  Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
263
264
0
  Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
265
0
  AlignSCEV = SE->getConstant(Int64Ty, Alignment);
266
267
  // The LHS might be a ptrtoint instruction, or it might be the pointer
268
  // with an offset.
269
0
  AAPtr = nullptr;
270
0
  OffSCEV = nullptr;
271
0
  if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
272
0
    AAPtr = PToI->getPointerOperand();
273
0
    OffSCEV = SE->getConstant(Int64Ty, 0);
274
0
  } else if (const SCEVAddExpr* AndLHSAddSCEV =
275
0
             dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
276
    // Try to find the ptrtoint; subtract it and the rest is the offset.
277
0
    for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
278
0
         JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
279
0
      if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
280
0
        if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
281
0
          AAPtr = PToI->getPointerOperand();
282
0
          OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
283
0
          break;
284
0
        }
285
0
  }
286
287
0
  if (!AAPtr)
288
0
    return false;
289
290
  // Sign extend the offset to 64 bits (so that it is like all of the other
291
  // expressions). 
292
0
  unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
293
0
  if (OffSCEVBits < 64)
294
0
    OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
295
0
  else if (OffSCEVBits > 64)
296
0
    return false;
297
298
0
  AAPtr = AAPtr->stripPointerCasts();
299
0
  return true;
300
0
}
301
302
0
bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
303
0
  Value *AAPtr;
304
0
  const SCEV *AlignSCEV, *OffSCEV;
305
0
  if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
306
0
    return false;
307
308
0
  const SCEV *AASCEV = SE->getSCEV(AAPtr);
309
310
  // Apply the assumption to all other users of the specified pointer.
311
0
  SmallPtrSet<Instruction *, 32> Visited;
312
0
  SmallVector<Instruction*, 16> WorkList;
313
0
  for (User *J : AAPtr->users()) {
314
0
    if (J == ACall)
315
0
      continue;
316
317
0
    if (Instruction *K = dyn_cast<Instruction>(J))
318
0
      if (isValidAssumeForContext(ACall, K, DT))
319
0
        WorkList.push_back(K);
320
0
  }
321
322
0
  while (!WorkList.empty()) {
323
0
    Instruction *J = WorkList.pop_back_val();
324
325
0
    if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
326
0
      unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
327
0
        LI->getPointerOperand(), SE);
328
329
0
      if (NewAlignment > LI->getAlignment()) {
330
0
        LI->setAlignment(NewAlignment);
331
0
        ++NumLoadAlignChanged;
332
0
      }
333
0
    } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
334
0
      unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
335
0
        SI->getPointerOperand(), SE);
336
337
0
      if (NewAlignment > SI->getAlignment()) {
338
0
        SI->setAlignment(NewAlignment);
339
0
        ++NumStoreAlignChanged;
340
0
      }
341
0
    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
342
0
      unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
343
0
        MI->getDest(), SE);
344
345
      // For memory transfers, we need a common alignment for both the
346
      // source and destination. If we have a new alignment for this
347
      // instruction, but only for one operand, save it. If we reach the
348
      // other operand through another assumption later, then we may
349
      // change the alignment at that point.
350
0
      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
351
0
        unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
352
0
          MTI->getSource(), SE);
353
354
0
        DenseMap<MemTransferInst *, unsigned>::iterator DI =
355
0
          NewDestAlignments.find(MTI);
356
0
        unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ?
357
0
                                    0 : DI->second;
358
359
0
        DenseMap<MemTransferInst *, unsigned>::iterator SI =
360
0
          NewSrcAlignments.find(MTI);
361
0
        unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ?
362
0
                                   0 : SI->second;
363
364
0
        DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " <<
365
0
                        AltDestAlignment << " " << NewSrcAlignment <<
366
0
                        " " << AltSrcAlignment << "\n");
367
368
        // Of these four alignments, pick the largest possible...
369
0
        unsigned NewAlignment = 0;
370
0
        if (NewDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
371
0
          NewAlignment = std::max(NewAlignment, NewDestAlignment);
372
0
        if (AltDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
373
0
          NewAlignment = std::max(NewAlignment, AltDestAlignment);
374
0
        if (NewSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
375
0
          NewAlignment = std::max(NewAlignment, NewSrcAlignment);
376
0
        if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
377
0
          NewAlignment = std::max(NewAlignment, AltSrcAlignment);
378
379
0
        if (NewAlignment > MI->getAlignment()) {
380
0
          MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
381
0
            MI->getParent()->getContext()), NewAlignment));
382
0
          ++NumMemIntAlignChanged;
383
0
        }
384
385
0
        NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment));
386
0
        NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment));
387
0
      } else if (NewDestAlignment > MI->getAlignment()) {
388
0
        assert((!isa<MemIntrinsic>(MI) || isa<MemSetInst>(MI)) &&
389
0
               "Unknown memory intrinsic");
390
391
0
        MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
392
0
          MI->getParent()->getContext()), NewDestAlignment));
393
0
        ++NumMemIntAlignChanged;
394
0
      }
395
0
    }
396
397
    // Now that we've updated that use of the pointer, look for other uses of
398
    // the pointer to update.
399
0
    Visited.insert(J);
400
0
    for (User *UJ : J->users()) {
401
0
      Instruction *K = cast<Instruction>(UJ);
402
0
      if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
403
0
        WorkList.push_back(K);
404
0
    }
405
0
  }
406
407
0
  return true;
408
0
}
409
410
20.9k
bool AlignmentFromAssumptions::runOnFunction(Function &F) {
411
20.9k
  bool Changed = false;
412
20.9k
  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
413
20.9k
  SE = &getAnalysis<ScalarEvolution>();
414
20.9k
  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
415
416
20.9k
  NewDestAlignments.clear();
417
20.9k
  NewSrcAlignments.clear();
418
419
20.9k
  for (auto &AssumeVH : AC.assumptions())
420
0
    if (AssumeVH)
421
0
      Changed |= processAssumption(cast<CallInst>(AssumeVH));
422
423
20.9k
  return Changed;
424
20.9k
}
425