Coverage Report

Created: 2025-08-28 20:31

/home/runner/work/DirectXShaderCompiler/DirectXShaderCompiler/lib/HLSL/HLOperationLower.cpp
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////////////
2
//                                                                           //
3
// HLOperationLower.cpp                                                      //
4
// Copyright (C) Microsoft Corporation. All rights reserved.                 //
5
// This file is distributed under the University of Illinois Open Source     //
6
// License. See LICENSE.TXT for details.                                     //
7
//                                                                           //
8
// Lower functions to lower HL operations to DXIL operations.                //
9
//                                                                           //
10
///////////////////////////////////////////////////////////////////////////////
11
12
#include "dxc/DXIL/DxilConstants.h"
13
#define _USE_MATH_DEFINES
14
#include <array>
15
#include <cmath>
16
#include <functional>
17
#include <unordered_set>
18
19
#include "dxc/DXIL/DxilConstants.h"
20
#include "dxc/DXIL/DxilInstructions.h"
21
#include "dxc/DXIL/DxilModule.h"
22
#include "dxc/DXIL/DxilOperations.h"
23
#include "dxc/DXIL/DxilResourceProperties.h"
24
#include "dxc/DXIL/DxilUtil.h"
25
#include "dxc/HLSL/DxilPoisonValues.h"
26
#include "dxc/HLSL/HLLowerUDT.h"
27
#include "dxc/HLSL/HLMatrixLowerHelper.h"
28
#include "dxc/HLSL/HLMatrixType.h"
29
#include "dxc/HLSL/HLModule.h"
30
#include "dxc/HLSL/HLOperationLower.h"
31
#include "dxc/HLSL/HLOperationLowerExtension.h"
32
#include "dxc/HLSL/HLOperations.h"
33
#include "dxc/HlslIntrinsicOp.h"
34
35
#include "llvm/ADT/APSInt.h"
36
#include "llvm/IR/GetElementPtrTypeIterator.h"
37
#include "llvm/IR/IRBuilder.h"
38
#include "llvm/IR/Instructions.h"
39
#include "llvm/IR/IntrinsicInst.h"
40
#include "llvm/IR/Module.h"
41
42
using namespace llvm;
43
using namespace hlsl;
44
45
struct HLOperationLowerHelper {
46
  HLModule &M;
47
  OP &hlslOP;
48
  Type *voidTy;
49
  Type *f32Ty;
50
  Type *i32Ty;
51
  Type *i16Ty;
52
  llvm::Type *i1Ty;
53
  Type *i8Ty;
54
  DxilTypeSystem &dxilTypeSys;
55
  DxilFunctionProps *functionProps;
56
  DataLayout dataLayout;
57
  SmallDenseMap<Type *, Type *, 4> loweredTypes;
58
  HLOperationLowerHelper(HLModule &HLM);
59
};
60
61
HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
62
40.6k
    : M(HLM), hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
63
40.6k
      dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
64
40.6k
                                ? 
hlsl::DXIL::kLegacyLayoutString38.3k
65
40.6k
                                : 
hlsl::DXIL::kNewLayoutString2.30k
)) {
66
40.6k
  llvm::LLVMContext &Ctx = HLM.GetCtx();
67
40.6k
  voidTy = Type::getVoidTy(Ctx);
68
40.6k
  f32Ty = Type::getFloatTy(Ctx);
69
40.6k
  i32Ty = Type::getInt32Ty(Ctx);
70
40.6k
  i16Ty = Type::getInt16Ty(Ctx);
71
40.6k
  i1Ty = Type::getInt1Ty(Ctx);
72
40.6k
  i8Ty = Type::getInt8Ty(Ctx);
73
40.6k
  Function *EntryFunc = HLM.GetEntryFunction();
74
40.6k
  functionProps = nullptr;
75
40.6k
  if (HLM.HasDxilFunctionProps(EntryFunc))
76
34.9k
    functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
77
40.6k
}
78
79
struct HLObjectOperationLowerHelper {
80
private:
81
  // For object intrinsics.
82
  HLModule &HLM;
83
  struct ResAttribute {
84
    DXIL::ResourceClass RC;
85
    DXIL::ResourceKind RK;
86
    Type *ResourceType;
87
  };
88
  std::unordered_map<Value *, ResAttribute> HandleMetaMap;
89
  std::unordered_set<Instruction *> &UpdateCounterSet;
90
  // Map from pointer of cbuffer to pointer of resource.
91
  // For cbuffer like this:
92
  //   cbuffer A {
93
  //     Texture2D T;
94
  //   };
95
  // A global resource Texture2D T2 will be created for Texture2D T.
96
  // CBPtrToResourceMap[T] will return T2.
97
  std::unordered_map<Value *, Value *> CBPtrToResourceMap;
98
99
public:
100
  HLObjectOperationLowerHelper(HLModule &HLM,
101
                               std::unordered_set<Instruction *> &UpdateCounter)
102
20.3k
      : HLM(HLM), UpdateCounterSet(UpdateCounter) {}
103
18.4k
  DXIL::ResourceClass GetRC(Value *Handle) {
104
18.4k
    ResAttribute &Res = FindCreateHandleResourceBase(Handle);
105
18.4k
    return Res.RC;
106
18.4k
  }
107
45.6k
  DXIL::ResourceKind GetRK(Value *Handle) {
108
45.6k
    ResAttribute &Res = FindCreateHandleResourceBase(Handle);
109
45.6k
    return Res.RK;
110
45.6k
  }
111
20.2k
  Type *GetResourceType(Value *Handle) {
112
20.2k
    ResAttribute &Res = FindCreateHandleResourceBase(Handle);
113
20.2k
    return Res.ResourceType;
114
20.2k
  }
115
116
2.94k
  void MarkHasCounter(Value *handle, Type *i8Ty) {
117
2.94k
    CallInst *CIHandle = cast<CallInst>(handle);
118
2.94k
    DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) ==
119
2.94k
                 HLOpcodeGroup::HLAnnotateHandle,
120
2.94k
             "else invalid handle");
121
    // Mark has counter for the input handle.
122
2.94k
    Value *counterHandle =
123
2.94k
        CIHandle->getArgOperand(HLOperandIndex::kHandleOpIdx);
124
    // Change kind into StructurBufferWithCounter.
125
2.94k
    Constant *Props = cast<Constant>(CIHandle->getArgOperand(
126
2.94k
        HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
127
2.94k
    DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props);
128
2.94k
    RP.Basic.SamplerCmpOrHasCounter = true;
129
130
2.94k
    CIHandle->setArgOperand(
131
2.94k
        HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx,
132
2.94k
        resource_helper::getAsConstant(RP,
133
2.94k
                                       HLM.GetOP()->GetResourcePropertiesType(),
134
2.94k
                                       *HLM.GetShaderModel()));
135
136
2.94k
    DXIL::ResourceClass RC = GetRC(handle);
137
2.94k
    DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
138
2.94k
                      "must UAV for counter");
139
2.94k
    std::unordered_set<Value *> resSet;
140
2.94k
    MarkHasCounterOnCreateHandle(counterHandle, resSet);
141
2.94k
  }
142
143
28
  DxilResourceBase *FindCBufferResourceFromHandle(Value *handle) {
144
28
    if (CallInst *CI = dyn_cast<CallInst>(handle)) {
145
28
      hlsl::HLOpcodeGroup group =
146
28
          hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
147
28
      if (group == HLOpcodeGroup::HLAnnotateHandle) {
148
28
        handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
149
28
      }
150
28
    }
151
152
28
    Constant *symbol = nullptr;
153
28
    if (CallInst *CI = dyn_cast<CallInst>(handle)) {
154
28
      hlsl::HLOpcodeGroup group =
155
28
          hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
156
28
      if (group == HLOpcodeGroup::HLCreateHandle) {
157
28
        symbol = dyn_cast<Constant>(
158
28
            CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
159
28
      }
160
28
    }
161
162
28
    if (!symbol)
163
0
      return nullptr;
164
165
28
    for (const std::unique_ptr<DxilCBuffer> &res : HLM.GetCBuffers()) {
166
28
      if (res->GetGlobalSymbol() == symbol)
167
28
        return res.get();
168
28
    }
169
0
    return nullptr;
170
28
  }
171
172
  Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
173
                                     GlobalVariable *CbGV,
174
314
                                     DxilResourceProperties &RP) {
175
    // Change array idx to 0 to make sure all array ptr share same key.
176
314
    Value *Key = UniformCbPtr(CbPtr, CbGV);
177
314
    if (CBPtrToResourceMap.count(Key))
178
24
      return CBPtrToResourceMap[Key];
179
290
    Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP);
180
290
    CBPtrToResourceMap[Key] = Resource;
181
290
    return Resource;
182
314
  }
183
184
314
  Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
185
    // Simple case.
186
314
    if (ResPtr->getType() == CbPtr->getType())
187
314
      return ResPtr;
188
189
    // Array case.
190
0
    DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
191
192
0
    IRBuilder<> Builder(CbPtr);
193
0
    gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
194
195
0
    Value *arrayIdx = GEPIt.getOperand();
196
197
    // Only calc array idx and size.
198
    // Ignore struct type part.
199
0
    for (; GEPIt != E; ++GEPIt) {
200
0
      if (GEPIt->isArrayTy()) {
201
0
        arrayIdx = Builder.CreateMul(
202
0
            arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
203
0
        arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
204
0
      }
205
0
    }
206
207
0
    return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
208
314
  }
209
210
314
  DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) {
211
314
    Constant *Props = cast<Constant>(Anno->getArgOperand(
212
314
        HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
213
314
    DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props);
214
314
    return RP;
215
314
  }
216
217
private:
218
84.4k
  ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
219
84.4k
    if (HandleMetaMap.count(Handle))
220
47.7k
      return HandleMetaMap[Handle];
221
222
    // Add invalid first to avoid dead loop.
223
36.6k
    HandleMetaMap[Handle] = {
224
36.6k
        DXIL::ResourceClass::Invalid, DXIL::ResourceKind::Invalid,
225
36.6k
        StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)};
226
36.6k
    if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
227
36.6k
      hlsl::HLOpcodeGroup group =
228
36.6k
          hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
229
36.6k
      if (group == HLOpcodeGroup::HLAnnotateHandle) {
230
36.6k
        Constant *Props = cast<Constant>(CI->getArgOperand(
231
36.6k
            HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
232
36.6k
        DxilResourceProperties RP =
233
36.6k
            resource_helper::loadPropsFromConstant(*Props);
234
36.6k
        Type *ResTy =
235
36.6k
            CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx)
236
36.6k
                ->getType();
237
238
36.6k
        ResAttribute Attrib = {RP.getResourceClass(), RP.getResourceKind(),
239
36.6k
                               ResTy};
240
241
36.6k
        HandleMetaMap[Handle] = Attrib;
242
36.6k
        return HandleMetaMap[Handle];
243
36.6k
      }
244
36.6k
    }
245
6
    dxilutil::EmitErrorOnContext(Handle->getContext(),
246
6
                                 "cannot map resource to handle.");
247
248
6
    return HandleMetaMap[Handle];
249
36.6k
  }
250
  CallInst *FindCreateHandle(Value *handle,
251
0
                             std::unordered_set<Value *> &resSet) {
252
0
    // Already checked.
253
0
    if (resSet.count(handle))
254
0
      return nullptr;
255
0
    resSet.insert(handle);
256
0
257
0
    if (CallInst *CI = dyn_cast<CallInst>(handle))
258
0
      return CI;
259
0
    if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
260
0
      if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
261
0
        return CI;
262
0
      if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
263
0
        return CI;
264
0
      return nullptr;
265
0
    }
266
0
    if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
267
0
      for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
268
0
        if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
269
0
          return CI;
270
0
      }
271
0
      return nullptr;
272
0
    }
273
0
274
0
    return nullptr;
275
0
  }
276
  void MarkHasCounterOnCreateHandle(Value *handle,
277
2.94k
                                    std::unordered_set<Value *> &resSet) {
278
    // Already checked.
279
2.94k
    if (resSet.count(handle))
280
0
      return;
281
2.94k
    resSet.insert(handle);
282
283
2.94k
    if (CallInst *CI = dyn_cast<CallInst>(handle)) {
284
2.94k
      Value *Res =
285
2.94k
          CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
286
2.94k
      LoadInst *LdRes = dyn_cast<LoadInst>(Res);
287
2.94k
      if (LdRes) {
288
2.93k
        UpdateCounterSet.insert(LdRes);
289
2.93k
        return;
290
2.93k
      }
291
8
      if (CallInst *CallRes = dyn_cast<CallInst>(Res)) {
292
8
        hlsl::HLOpcodeGroup group =
293
8
            hlsl::GetHLOpcodeGroup(CallRes->getCalledFunction());
294
8
        if (group == HLOpcodeGroup::HLCast) {
295
8
          HLCastOpcode opcode =
296
8
              static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CallRes));
297
8
          if (opcode == HLCastOpcode::HandleToResCast) {
298
8
            if (Instruction *Hdl = dyn_cast<Instruction>(
299
8
                    CallRes->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx)))
300
8
              UpdateCounterSet.insert(Hdl);
301
8
            return;
302
8
          }
303
8
        }
304
8
      }
305
0
      dxilutil::EmitErrorOnInstruction(CI, "cannot map resource to handle.");
306
0
      return;
307
8
    }
308
0
    if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
309
0
      MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
310
0
      MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
311
0
    }
312
0
    if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
313
0
      for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
314
0
        MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
315
0
      }
316
0
    }
317
0
  }
318
319
314
  Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
320
314
    gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
321
314
    std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
322
314
    unsigned i = 0;
323
314
    IRBuilder<> Builder(HLM.GetCtx());
324
314
    Value *zero = Builder.getInt32(0);
325
1.29k
    for (; GEPIt != E; 
++GEPIt, ++i982
) {
326
982
      ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand());
327
982
      if (!ImmIdx) {
328
        // Remove dynamic indexing to avoid crash.
329
8
        idxList[i] = zero;
330
8
      }
331
982
    }
332
333
314
    Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
334
314
    return Key;
335
314
  }
336
337
  Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
338
290
                                DxilResourceProperties &RP) {
339
290
    Type *CbTy = CbPtr->getPointerOperandType();
340
290
    DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(),
341
290
                      "else arg not point to var");
342
343
290
    gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
344
290
    unsigned i = 0;
345
290
    IRBuilder<> Builder(HLM.GetCtx());
346
290
    unsigned arraySize = 1;
347
290
    DxilTypeSystem &typeSys = HLM.GetTypeSystem();
348
349
290
    std::string Name;
350
1.19k
    for (; GEPIt != E; 
++GEPIt, ++i902
) {
351
902
      if (GEPIt->isArrayTy()) {
352
72
        arraySize *= GEPIt->getArrayNumElements();
353
72
        if (!Name.empty())
354
72
          Name += ".";
355
72
        if (ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand())) {
356
64
          unsigned idx = ImmIdx->getLimitedValue();
357
64
          Name += std::to_string(idx);
358
64
        }
359
830
      } else if (GEPIt->isStructTy()) {
360
540
        DxilStructAnnotation *typeAnnot =
361
540
            typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
362
540
        DXASSERT_NOMSG(typeAnnot);
363
540
        unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
364
540
        DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
365
540
        DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
366
540
        if (!Name.empty())
367
250
          Name += ".";
368
540
        Name += fieldAnnot.GetFieldName();
369
540
      }
370
902
    }
371
372
290
    Type *Ty = CbPtr->getResultElementType();
373
    // Not support resource array in cbuffer.
374
290
    unsigned ResBinding =
375
290
        HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.getResourceClass());
376
290
    return CreateResourceGV(Ty, Name, RP, ResBinding);
377
290
  }
378
379
  Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP,
380
290
                          unsigned ResBinding) {
381
290
    Module &M = *HLM.GetModule();
382
290
    Constant *GV = M.getOrInsertGlobal(Name, Ty);
383
    // Create resource and set GV as globalSym.
384
290
    DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP);
385
290
    DXASSERT(Res, "fail to create resource for global variable in cbuffer");
386
290
    Res->SetLowerBound(ResBinding);
387
290
    return GV;
388
290
  }
389
};
390
391
// Helper for lowering resource extension methods.
392
struct HLObjectExtensionLowerHelper : public hlsl::HLResourceLookup {
393
  explicit HLObjectExtensionLowerHelper(HLObjectOperationLowerHelper &ObjHelper)
394
68
      : m_ObjHelper(ObjHelper) {}
395
396
6
  virtual bool GetResourceKindName(Value *HLHandle, const char **ppName) {
397
6
    DXIL::ResourceKind K = m_ObjHelper.GetRK(HLHandle);
398
6
    bool Success = K != DXIL::ResourceKind::Invalid;
399
6
    if (Success) {
400
6
      *ppName = hlsl::GetResourceKindName(K);
401
6
    }
402
6
    return Success;
403
6
  }
404
405
private:
406
  HLObjectOperationLowerHelper &m_ObjHelper;
407
};
408
409
using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
410
                                     DXIL::OpCode opcode,
411
                                     HLOperationLowerHelper &helper,
412
                                     HLObjectOperationLowerHelper *pObjHelper,
413
                                     bool &Translated);
414
415
struct IntrinsicLower {
416
  // Intrinsic opcode.
417
  IntrinsicOp IntriOpcode;
418
  // Lower function.
419
  IntrinsicLowerFuncTy &LowerFunc;
420
  // DXIL opcode if can direct map.
421
  DXIL::OpCode DxilOpcode;
422
};
423
424
// IOP intrinsics.
425
namespace {
426
427
// Creates the necessary scalar calls to for a "trivial" operation where only
428
// call instructions to a single function type are needed.
429
// The overload type `Ty` determines what scalarization might be required.
430
// Elements of any vectors in `refArgs` are extracted  into scalars for each
431
// call generated while the same scalar values are used unaltered in each call.
432
// Utility objects `HlslOp` and `Builder` are used to generate calls to the
433
// given `DxilFunc` for each set of scalar arguments.
434
// The results are reconstructed into the given `RetTy` as needed.
435
Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode,
436
                            ArrayRef<Value *> refArgs, Type *Ty, Type *RetTy,
437
32.2k
                            OP *hlslOP, IRBuilder<> &Builder) {
438
32.2k
  unsigned argNum = refArgs.size();
439
32.2k
  std::vector<Value *> args = refArgs;
440
441
32.2k
  if (Ty->isVectorTy()) {
442
8.37k
    Value *retVal = llvm::UndefValue::get(RetTy);
443
8.37k
    unsigned vecSize = Ty->getVectorNumElements();
444
35.2k
    for (unsigned i = 0; i < vecSize; 
i++26.8k
) {
445
      // Update vector args, skip known opcode arg.
446
68.4k
      for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
447
41.5k
           argIdx++) {
448
41.5k
        if (refArgs[argIdx]->getType()->isVectorTy()) {
449
37.1k
          Value *arg = refArgs[argIdx];
450
37.1k
          args[argIdx] = Builder.CreateExtractElement(arg, i);
451
37.1k
        }
452
41.5k
      }
453
26.8k
      Value *EltOP =
454
26.8k
          Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
455
26.8k
      retVal = Builder.CreateInsertElement(retVal, EltOP, i);
456
26.8k
    }
457
8.37k
    return retVal;
458
8.37k
  }
459
460
  // Cannot add name to void.
461
23.8k
  if (RetTy->isVoidTy())
462
254
    return Builder.CreateCall(dxilFunc, args);
463
464
23.5k
  return Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
465
23.8k
}
466
467
// Creates a native vector call to for a "trivial" operation where only a single
468
// call instruction is needed. The overload and return types are the same vector
469
// type `Ty`.
470
// Utility objects `HlslOp` and `Builder` are used to create a call to the given
471
// `DxilFunc` with `RefArgs` arguments.
472
Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode,
473
                                  ArrayRef<Value *> Args, Type *Ty, OP *OP,
474
968
                                  IRBuilder<> &Builder) {
475
968
  if (!Ty->isVoidTy())
476
968
    return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode));
477
0
  return Builder.CreateCall(Func, Args); // Cannot add name to void.
478
968
}
479
480
// Generates a DXIL operation with the overloaded type based on `Ty` and return
481
// type `RetTy`. When Ty is a vector, it will either generate per-element calls
482
// for each vector element and reconstruct the vector type from those results or
483
// operate on and return native vectors depending on vector size and the
484
// legality of the vector overload.
485
Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
486
                            Type *Ty, Type *RetTy, OP *hlslOP,
487
32.4k
                            IRBuilder<> &Builder) {
488
489
  // If supported and the overload type is a vector with more than 1 element,
490
  // create a native vector operation.
491
32.4k
  if (Ty->isVectorTy() && 
Ty->getVectorNumElements() > 19.25k
&&
492
32.4k
      
hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()8.43k
&&
493
32.4k
      
OP::IsOverloadLegal(opcode, Ty)1.05k
) {
494
968
    Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
495
968
    return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP,
496
968
                                      Builder);
497
968
  }
498
499
  // Set overload type to the scalar type of `Ty` and generate call(s).
500
31.5k
  Type *EltTy = Ty->getScalarType();
501
31.5k
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
502
503
31.5k
  return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP,
504
31.5k
                              Builder);
505
32.4k
}
506
507
Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
508
3.57k
                            Type *Ty, Instruction *Inst, OP *hlslOP) {
509
3.57k
  DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
510
3.57k
  DXASSERT(refArgs[0] == nullptr,
511
3.57k
           "else caller has already filled the value in");
512
3.57k
  IRBuilder<> B(Inst);
513
3.57k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
514
3.57k
  const_cast<llvm::Value **>(refArgs.data())[0] =
515
3.57k
      opArg; // actually stack memory from caller
516
3.57k
  return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
517
3.57k
}
518
519
// Translate call that converts to a dxil unary operation with a different
520
// return type from the overload by passing the argument, explicit return type,
521
// and helper objects to the scalarizing unary dxil operation creation.
522
Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP,
523
                                OP::OpCode OpCode,
524
                                HLOperationLowerHelper &Helper,
525
                                HLObjectOperationLowerHelper *,
526
98
                                bool &Translated) {
527
98
  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
528
98
  Type *Ty = Src->getType();
529
530
98
  IRBuilder<> Builder(CI);
531
98
  hlsl::OP *OP = &Helper.hlslOP;
532
98
  Type *RetTy = CI->getType();
533
98
  Constant *OpArg = OP->GetU32Const((unsigned)OpCode);
534
98
  Value *Args[] = {OpArg, Src};
535
536
98
  return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder);
537
98
}
538
539
Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op,
540
7.56k
                                 IRBuilder<> &Builder) {
541
7.56k
  Type *Ty = Src->getType();
542
543
7.56k
  Constant *OpArg = Op->GetU32Const((unsigned)OpCode);
544
7.56k
  Value *Args[] = {OpArg, Src};
545
546
7.56k
  return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder);
547
7.56k
}
548
549
Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
550
7.06k
                                  hlsl::OP *hlslOP, IRBuilder<> &Builder) {
551
7.06k
  Type *Ty = src0->getType();
552
553
7.06k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
554
7.06k
  Value *args[] = {opArg, src0, src1};
555
556
7.06k
  return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
557
7.06k
}
558
559
Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
560
                                   Value *src2, hlsl::OP *hlslOP,
561
12.9k
                                   IRBuilder<> &Builder) {
562
12.9k
  Type *Ty = src0->getType();
563
564
12.9k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
565
12.9k
  Value *args[] = {opArg, src0, src1, src2};
566
567
12.9k
  return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
568
12.9k
}
569
570
// Translate call that trivially converts to a dxil unary operation by passing
571
// argument, return type, and helper objects to either scalarizing or native
572
// vector dxil operation creation depending on version and vector size.
573
Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
574
                             HLOperationLowerHelper &helper,
575
                             HLObjectOperationLowerHelper *pObjHelper,
576
4.30k
                             bool &Translated) {
577
4.30k
  Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
578
4.30k
  IRBuilder<> Builder(CI);
579
4.30k
  hlsl::OP *hlslOP = &helper.hlslOP;
580
581
4.30k
  return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder);
582
4.30k
}
583
584
// Translate call that trivially converts to a dxil binary operation by passing
585
// arguments, return type, and helper objects to either scalarizing or native
586
// vector dxil operation creation depending on version and vector size.
587
Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
588
                              HLOperationLowerHelper &helper,
589
                              HLObjectOperationLowerHelper *pObjHelper,
590
2.49k
                              bool &Translated) {
591
2.49k
  hlsl::OP *hlslOP = &helper.hlslOP;
592
2.49k
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
593
2.49k
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
594
2.49k
  IRBuilder<> Builder(CI);
595
596
2.49k
  Value *binOp =
597
2.49k
      TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
598
2.49k
  return binOp;
599
2.49k
}
600
601
// Translate call that trivially converts to a dxil trinary (aka tertiary)
602
// operation by passing arguments, return type, and helper objects to either
603
// scalarizing or native vector dxil operation creation depending on version
604
// and vector size.
605
Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
606
                               HLOperationLowerHelper &helper,
607
                               HLObjectOperationLowerHelper *pObjHelper,
608
12.0k
                               bool &Translated) {
609
12.0k
  hlsl::OP *hlslOP = &helper.hlslOP;
610
12.0k
  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
611
12.0k
  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
612
12.0k
  Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
613
12.0k
  IRBuilder<> Builder(CI);
614
615
12.0k
  Value *triOp =
616
12.0k
      TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
617
12.0k
  return triOp;
618
12.0k
}
619
620
Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
621
                             HLOperationLowerHelper &helper,
622
                             HLObjectOperationLowerHelper *pObjHelper,
623
102
                             bool &Translated) {
624
102
  hlsl::OP *hlslOP = &helper.hlslOP;
625
102
  Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
626
102
  IRBuilder<> Builder(CI);
627
628
102
  Type *Ty = src->getType();
629
102
  Type *RetTy = Type::getInt1Ty(CI->getContext());
630
102
  if (Ty->isVectorTy())
631
90
    RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
632
633
102
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
634
102
  Value *args[] = {opArg, src};
635
636
102
  return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
637
102
}
638
639
120
bool IsResourceGEP(GetElementPtrInst *I) {
640
120
  Type *Ty = I->getType()->getPointerElementType();
641
120
  Ty = dxilutil::GetArrayEltTy(Ty);
642
  // Only mark on GEP which point to resource.
643
120
  return dxilutil::IsHLSLResourceType(Ty);
644
120
}
645
646
Value *TranslateNonUniformResourceIndex(
647
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
648
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
649
196
    bool &Translated) {
650
196
  Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
651
196
  Type *hdlTy = helper.hlslOP.GetHandleType();
652
212
  for (User *U : CI->users()) {
653
212
    if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
654
      // Only mark on GEP which point to resource.
655
108
      if (IsResourceGEP(I))
656
100
        DxilMDHelper::MarkNonUniform(I);
657
108
    } else 
if (CastInst *104
castI104
= dyn_cast<CastInst>(U)) {
658
40
      for (User *castU : castI->users()) {
659
40
        if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) {
660
          // Only mark on GEP which point to resource.
661
12
          if (IsResourceGEP(I))
662
12
            DxilMDHelper::MarkNonUniform(I);
663
28
        } else if (CallInst *CI = dyn_cast<CallInst>(castU)) {
664
28
          if (CI->getType() == hdlTy)
665
28
            DxilMDHelper::MarkNonUniform(CI);
666
28
        }
667
40
      }
668
64
    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
669
64
      if (CI->getType() == hdlTy)
670
44
        DxilMDHelper::MarkNonUniform(CI);
671
64
    }
672
212
  }
673
196
  CI->replaceAllUsesWith(V);
674
196
  return nullptr;
675
196
}
676
677
Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
678
                      HLOperationLowerHelper &helper,
679
                      HLObjectOperationLowerHelper *pObjHelper,
680
1.50k
                      bool &Translated) {
681
1.50k
  hlsl::OP *OP = &helper.hlslOP;
682
1.50k
  Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
683
1.50k
  Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
684
685
1.50k
  unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
686
1.50k
  unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
687
1.50k
  unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
688
  // unsigned ut =
689
  // static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
690
691
1.50k
  unsigned barrierMode = 0;
692
1.50k
  switch (IOP) {
693
8
  case IntrinsicOp::IOP_AllMemoryBarrier:
694
8
    barrierMode = uglobal | g;
695
8
    break;
696
16
  case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
697
16
    barrierMode = uglobal | g | t;
698
16
    break;
699
32
  case IntrinsicOp::IOP_GroupMemoryBarrier:
700
32
    barrierMode = g;
701
32
    break;
702
1.41k
  case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
703
1.41k
    barrierMode = g | t;
704
1.41k
    break;
705
24
  case IntrinsicOp::IOP_DeviceMemoryBarrier:
706
24
    barrierMode = uglobal;
707
24
    break;
708
8
  case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
709
8
    barrierMode = uglobal | t;
710
8
    break;
711
0
  default:
712
0
    DXASSERT(0, "invalid opcode for barrier");
713
0
    break;
714
1.50k
  }
715
1.50k
  Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
716
717
1.50k
  Value *args[] = {opArg, src0};
718
719
1.50k
  IRBuilder<> Builder(CI);
720
1.50k
  Builder.CreateCall(dxilFunc, args);
721
1.50k
  return nullptr;
722
1.50k
}
723
724
Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
725
                                 OP::OpCode opcode,
726
                                 HLOperationLowerHelper &helper,
727
                                 HLObjectOperationLowerHelper *pObjHelper,
728
32
                                 bool &Translated) {
729
32
  IRBuilder<> Builder(CI);
730
32
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
731
32
  Type *Ty = val->getType();
732
733
  // Use the same scaling factor used by FXC (i.e., 255.001953)
734
  // Excerpt from stackoverflow discussion:
735
  // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
736
32
  Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255.001953);
737
738
32
  if (Ty->isVectorTy()) {
739
32
    static constexpr int supportedVecElemCount = 4;
740
32
    if (Ty->getVectorNumElements() != supportedVecElemCount) {
741
0
      llvm_unreachable(
742
0
          "Unsupported input type for intrinsic D3DColorToUByte4.");
743
0
      return UndefValue::get(CI->getType());
744
0
    }
745
746
32
    toByteConst = ConstantVector::getSplat(supportedVecElemCount, toByteConst);
747
    // Swizzle the input val -> val.zyxw
748
32
    SmallVector<int, 4> mask{2, 1, 0, 3};
749
32
    val = Builder.CreateShuffleVector(val, val, mask);
750
32
  }
751
752
32
  Value *byte4 = Builder.CreateFMul(toByteConst, val);
753
32
  return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType());
754
32
}
755
756
// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
757
// Fxc uses the below rules when choosing mul-only code gen pattern to implement
758
// pow function. Rule 1: Applicable only to power values in the range
759
// [INT32_MIN, INT32_MAX] Rule 2: The maximum number of mul ops needed shouldn't
760
// exceed (2n+1) or (n+1) based on whether the power
761
//         is a positive or a negative value. Here "n" is the number of scalar
762
//         elements in power.
763
// Rule 3: Power must be an exact value.
764
// +----------+---------------------+------------------+
765
// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
766
// +----------+---------------------+------------------+
767
// | float4x4 | True                |               33 |
768
// | float4x4 | False               |               17 |
769
// | float4x2 | True                |               17 |
770
// | float4x2 | False               |                9 |
771
// | float2x4 | True                |               17 |
772
// | float2x4 | False               |                9 |
773
// | float4   | True                |                9 |
774
// | float4   | False               |                5 |
775
// | float2   | True                |                5 |
776
// | float2   | False               |                3 |
777
// | float    | True                |                3 |
778
// | float    | False               |                2 |
779
// +----------+---------------------+------------------+
780
781
bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow,
782
1.45k
                                   int32_t &powI) {
783
  // Applicable only when power is a literal.
784
1.45k
  if (!isa<ConstantDataVector>(pow) && 
!isa<ConstantFP>(pow)262
) {
785
74
    return false;
786
74
  }
787
788
  // Only apply this code gen on splat values.
789
1.38k
  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
790
1.19k
    if (!hlsl::dxilutil::IsSplat(cdv)) {
791
8
      return false;
792
8
    }
793
1.19k
  }
794
795
  // Only apply on aggregates of 16 or fewer elements,
796
  // representing the max 4x4 matrix size.
797
1.37k
  Type *Ty = x->getType();
798
1.37k
  if (Ty->isVectorTy() && 
Ty->getVectorNumElements() > 161.18k
)
799
0
    return false;
800
801
1.37k
  APFloat powAPF = isa<ConstantDataVector>(pow)
802
1.37k
                       ? 
cast<ConstantDataVector>(pow)->getElementAsAPFloat(0)1.18k
803
1.37k
                       : // should be a splat value
804
1.37k
                       
cast<ConstantFP>(pow)->getValueAPF()188
;
805
1.37k
  APSInt powAPS(32, false);
806
1.37k
  bool isExact = false;
807
  // Try converting float value of power to integer and also check if the float
808
  // value is exact.
809
1.37k
  APFloat::opStatus status =
810
1.37k
      powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
811
1.37k
  if (status == APFloat::opStatus::opOK && 
isExact348
) {
812
340
    powI = powAPS.getExtValue();
813
340
    uint32_t powU = abs(powI);
814
340
    int setBitCount = 0;
815
340
    int maxBitSetPos = -1;
816
11.2k
    for (int i = 0; i < 32; 
i++10.8k
) {
817
10.8k
      if ((powU >> i) & 1) {
818
548
        setBitCount++;
819
548
        maxBitSetPos = i;
820
548
      }
821
10.8k
    }
822
823
340
    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
824
340
    unsigned numElem =
825
340
        isa<ConstantDataVector>(pow) ? 
x->getType()->getVectorNumElements()152
:
1188
;
826
340
    int mulOpThreshold = powI < 0 ? 
numElem + 132
:
2 * numElem + 1308
;
827
340
    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
828
340
    return mulOpNeeded <= mulOpThreshold;
829
340
  }
830
831
1.03k
  return false;
832
1.37k
}
833
834
Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<> &Builder, Value *x,
835
184
                                          const int32_t y) {
836
184
  uint32_t absY = abs(y);
837
  // If y is zero then always return 1.
838
184
  if (absY == 0) {
839
8
    return ConstantFP::get(x->getType(), 1);
840
8
  }
841
842
176
  int lastSetPos = -1;
843
176
  Value *result = nullptr;
844
176
  Value *mul = nullptr;
845
5.80k
  for (int i = 0; i < 32; 
i++5.63k
) {
846
5.63k
    if ((absY >> i) & 1) {
847
1.31k
      for (int j = i; j > lastSetPos; 
j--1.00k
) {
848
1.00k
        if (!mul) {
849
176
          mul = x;
850
832
        } else {
851
832
          mul = Builder.CreateFMul(mul, mul);
852
832
        }
853
1.00k
      }
854
855
304
      result = (result == nullptr) ? 
mul176
:
Builder.CreateFMul(result, mul)128
;
856
304
      lastSetPos = i;
857
304
    }
858
5.63k
  }
859
860
  // Compute reciprocal for negative power values.
861
176
  if (y < 0) {
862
32
    Value *constOne = ConstantFP::get(x->getType(), 1);
863
32
    result = Builder.CreateFDiv(constOne, result);
864
32
  }
865
866
176
  return result;
867
184
}
868
869
Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<> &Builder, Value *x,
870
1.45k
                        Value *y, bool isFXCCompatMode = false) {
871
  // As applicable implement pow using only mul ops as done by Fxc.
872
1.45k
  int32_t p = 0;
873
1.45k
  if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
874
304
    if (isFXCCompatMode)
875
184
      return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
876
    // Only take care 2 for it will not affect register pressure.
877
120
    if (p == 2)
878
56
      return Builder.CreateFMul(x, x);
879
120
  }
880
881
  // Default to log-mul-exp pattern if previous scenarios don't apply.
882
  // t = log(x);
883
1.21k
  Value *logX =
884
1.21k
      TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
885
  // t = y * t;
886
1.21k
  Value *mulY = Builder.CreateFMul(logX, y);
887
  // pow = exp(t);
888
1.21k
  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
889
1.45k
}
890
891
Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
892
                          HLOperationLowerHelper &helper,
893
                          HLObjectOperationLowerHelper *pObjHelper,
894
32
                          bool &Translated) {
895
32
  hlsl::OP *hlslOP = &helper.hlslOP;
896
32
  IRBuilder<> Builder(CI);
897
32
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
898
32
  Type *Ty = val->getType();
899
32
  VectorType *VT = dyn_cast<VectorType>(Ty);
900
32
  if (!VT) {
901
0
    dxilutil::EmitErrorOnInstruction(
902
0
        CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
903
0
    return UndefValue::get(Ty);
904
0
  }
905
906
32
  unsigned size = VT->getNumElements();
907
32
  if (size != 2 && 
size != 424
) {
908
16
    dxilutil::EmitErrorOnInstruction(
909
16
        CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
910
16
    return UndefValue::get(Ty);
911
16
  }
912
16
  Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
913
16
  Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
914
915
16
  Value *RetVal = UndefValue::get(Ty);
916
917
16
  Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
918
16
  Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
919
40
  for (unsigned i = 0; i < size; 
i += 224
) {
920
24
    Value *low0 = Builder.CreateExtractElement(op0, i);
921
24
    Value *low1 = Builder.CreateExtractElement(op1, i);
922
24
    Value *lowWithC = Builder.CreateCall(AddC, {opArg, low0, low1});
923
24
    Value *low = Builder.CreateExtractValue(lowWithC, 0);
924
24
    RetVal = Builder.CreateInsertElement(RetVal, low, i);
925
926
24
    Value *carry = Builder.CreateExtractValue(lowWithC, 1);
927
    // Ext i1 to i32
928
24
    carry = Builder.CreateZExt(carry, helper.i32Ty);
929
930
24
    Value *hi0 = Builder.CreateExtractElement(op0, i + 1);
931
24
    Value *hi1 = Builder.CreateExtractElement(op1, i + 1);
932
24
    Value *hi = Builder.CreateAdd(hi0, hi1);
933
24
    hi = Builder.CreateAdd(hi, carry);
934
24
    RetVal = Builder.CreateInsertElement(RetVal, hi, i + 1);
935
24
  }
936
16
  return RetVal;
937
32
}
938
939
936
bool IsValidLoadInput(Value *V) {
940
  // Must be load input.
941
  // TODO: report this error on front-end
942
936
  if (!V || !isa<CallInst>(V)) {
943
12
    return false;
944
12
  }
945
924
  CallInst *CI = cast<CallInst>(V);
946
  // Must be immediate.
947
924
  ConstantInt *opArg =
948
924
      cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
949
924
  DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
950
924
  if (op != DXIL::OpCode::LoadInput) {
951
0
    return false;
952
0
  }
953
924
  return true;
954
924
}
955
956
// Tunnel through insert/extract element and shuffle to find original source
957
// of scalar value, or specified element (vecIdx) of vector value.
958
936
Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
959
936
  Type *srcTy = src->getType()->getScalarType();
960
6.16k
  while (src && !isa<UndefValue>(src)) {
961
6.16k
    if (src->getType()->isVectorTy()) {
962
5.10k
      if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
963
4.18k
        unsigned curIdx = (unsigned)cast<ConstantInt>(IE->getOperand(2))
964
4.18k
                              ->getUniqueInteger()
965
4.18k
                              .getLimitedValue();
966
4.18k
        src = IE->getOperand((curIdx == vecIdx) ? 
1938
:
03.25k
);
967
4.18k
      } else 
if (ShuffleVectorInst *916
SV916
= dyn_cast<ShuffleVectorInst>(src)) {
968
904
        int newIdx = SV->getMaskValue(vecIdx);
969
904
        if (newIdx < 0)
970
0
          return UndefValue::get(srcTy);
971
904
        vecIdx = (unsigned)newIdx;
972
904
        src = SV->getOperand(0);
973
904
        unsigned numElt = src->getType()->getVectorNumElements();
974
904
        if (numElt <= vecIdx) {
975
0
          vecIdx -= numElt;
976
0
          src = SV->getOperand(1);
977
0
        }
978
904
      } else {
979
12
        return UndefValue::get(srcTy); // Didn't find it.
980
12
      }
981
5.10k
    } else {
982
1.06k
      if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(src)) {
983
56
        vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
984
56
                     ->getUniqueInteger()
985
56
                     .getLimitedValue();
986
56
        src = EE->getVectorOperand();
987
1.00k
      } else if (hlsl::dxilutil::IsConvergentMarker(src)) {
988
80
        src = hlsl::dxilutil::GetConvergentSource(src);
989
924
      } else {
990
924
        break; // Found it.
991
924
      }
992
1.06k
    }
993
6.16k
  }
994
924
  return src;
995
936
}
996
997
// Finds corresponding inputs, calls translation for each, and returns
998
// resulting vector or scalar.
999
// Uses functor that takes (inputElemID, rowIdx, colIdx), and returns
1000
// translation for one input scalar.
1001
Value *TranslateEvalHelper(
1002
    CallInst *CI, Value *val, IRBuilder<> &Builder,
1003
266
    std::function<Value *(Value *, Value *, Value *)> fnTranslateScalarInput) {
1004
266
  Type *Ty = CI->getType();
1005
266
  Value *result = UndefValue::get(Ty);
1006
266
  if (Ty->isVectorTy()) {
1007
1.10k
    for (unsigned i = 0; i < Ty->getVectorNumElements(); 
++i882
) {
1008
894
      Value *InputEl = FindScalarSource(val, i);
1009
894
      if (!IsValidLoadInput(InputEl)) {
1010
12
        dxilutil::EmitErrorOnInstruction(
1011
12
            CI, "attribute evaluation can only be done "
1012
12
                "on values taken directly from inputs.");
1013
12
        return result;
1014
12
      }
1015
882
      CallInst *loadInput = cast<CallInst>(InputEl);
1016
882
      Value *inputElemID =
1017
882
          loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
1018
882
      Value *rowIdx =
1019
882
          loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
1020
882
      Value *colIdx =
1021
882
          loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
1022
882
      Value *Elt = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
1023
882
      result = Builder.CreateInsertElement(result, Elt, i);
1024
882
    }
1025
224
  } else {
1026
42
    Value *InputEl = FindScalarSource(val);
1027
42
    if (!IsValidLoadInput(InputEl)) {
1028
0
      dxilutil::EmitErrorOnInstruction(CI,
1029
0
                                       "attribute evaluation can only be done "
1030
0
                                       "on values taken directly from inputs.");
1031
0
      return result;
1032
0
    }
1033
42
    CallInst *loadInput = cast<CallInst>(InputEl);
1034
42
    Value *inputElemID =
1035
42
        loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
1036
42
    Value *rowIdx =
1037
42
        loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
1038
42
    Value *colIdx =
1039
42
        loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
1040
42
    result = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
1041
42
  }
1042
254
  return result;
1043
266
}
1044
1045
Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1046
                           HLOperationLowerHelper &helper,
1047
                           HLObjectOperationLowerHelper *pObjHelper,
1048
80
                           bool &Translated) {
1049
80
  hlsl::OP *hlslOP = &helper.hlslOP;
1050
80
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1051
80
  Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1052
80
  IRBuilder<> Builder(CI);
1053
80
  OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
1054
80
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
1055
80
  Function *evalFunc =
1056
80
      hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
1057
1058
80
  return TranslateEvalHelper(
1059
80
      CI, val, Builder,
1060
160
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1061
160
        return Builder.CreateCall(
1062
160
            evalFunc, {opArg, inputElemID, rowIdx, colIdx, sampleIdx});
1063
160
      });
1064
80
}
1065
1066
Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1067
                            HLOperationLowerHelper &helper,
1068
                            HLObjectOperationLowerHelper *pObjHelper,
1069
16
                            bool &Translated) {
1070
16
  hlsl::OP *hlslOP = &helper.hlslOP;
1071
16
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1072
16
  Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1073
16
  IRBuilder<> Builder(CI);
1074
16
  Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
1075
16
  Value *offsetY = Builder.CreateExtractElement(offset, 1);
1076
16
  OP::OpCode opcode = OP::OpCode::EvalSnapped;
1077
16
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
1078
16
  Function *evalFunc =
1079
16
      hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
1080
1081
16
  return TranslateEvalHelper(
1082
16
      CI, val, Builder,
1083
64
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1084
64
        return Builder.CreateCall(
1085
64
            evalFunc, {opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY});
1086
64
      });
1087
16
}
1088
1089
Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1090
                             HLOperationLowerHelper &helper,
1091
                             HLObjectOperationLowerHelper *pObjHelper,
1092
88
                             bool &Translated) {
1093
88
  hlsl::OP *hlslOP = &helper.hlslOP;
1094
88
  Value *val = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
1095
88
  IRBuilder<> Builder(CI);
1096
88
  OP::OpCode opcode = OP::OpCode::EvalCentroid;
1097
88
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
1098
88
  Function *evalFunc =
1099
88
      hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
1100
1101
88
  return TranslateEvalHelper(
1102
88
      CI, val, Builder,
1103
410
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1104
410
        return Builder.CreateCall(evalFunc,
1105
410
                                  {opArg, inputElemID, rowIdx, colIdx});
1106
410
      });
1107
88
}
1108
1109
/*
1110
HLSL: bool RWDispatchNodeInputRecord<recordType>::FinishedCrossGroupSharing()
1111
DXIL: i1 @dx.op.finishedCrossGroupSharing(i32 %Opcode,
1112
%dx.types.NodeRecordHandle %NodeInputRecordHandle)
1113
*/
1114
Value *TranslateNodeFinishedCrossGroupSharing(
1115
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1116
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
1117
8
    bool &Translated) {
1118
8
  hlsl::OP *OP = &helper.hlslOP;
1119
1120
8
  Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
1121
8
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1122
8
  DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType());
1123
8
  Value *opArg = OP->GetU32Const((unsigned)op);
1124
1125
8
  IRBuilder<> Builder(CI);
1126
8
  return Builder.CreateCall(dxilFunc, {opArg, handle});
1127
8
}
1128
1129
/*
1130
HLSL:
1131
    bool NodeOutput<recordType>::IsValid()
1132
    bool EmptyNodeOutput::IsValid()
1133
DXIL:
1134
  i1 @dx.op.nodeOutputIsValid(i32 %Opcode, %dx.types.NodeHandle
1135
%NodeOutputHandle)
1136
*/
1137
Value *TranslateNodeOutputIsValid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1138
                                  HLOperationLowerHelper &helper,
1139
                                  HLObjectOperationLowerHelper *pObjHelper,
1140
48
                                  bool &Translated) {
1141
48
  hlsl::OP *OP = &helper.hlslOP;
1142
48
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1143
48
  Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
1144
48
  Value *opArg = OP->GetU32Const((unsigned)op);
1145
1146
48
  IRBuilder<> Builder(CI);
1147
48
  return Builder.CreateCall(dxilFunc, {opArg, handle});
1148
48
}
1149
1150
Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP,
1151
                                     OP::OpCode op,
1152
                                     HLOperationLowerHelper &helper,
1153
                                     HLObjectOperationLowerHelper *pObjHelper,
1154
82
                                     bool &Translated) {
1155
82
  DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
1156
82
  hlsl::OP *hlslOP = &helper.hlslOP;
1157
82
  IRBuilder<> Builder(CI);
1158
82
  Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
1159
82
  Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
1160
82
  Value *vertexI8Idx =
1161
82
      Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
1162
82
  Value *opArg = hlslOP->GetU32Const((unsigned)op);
1163
82
  Function *evalFunc = hlslOP->GetOpFunc(op, val->getType()->getScalarType());
1164
1165
82
  return TranslateEvalHelper(
1166
82
      CI, val, Builder,
1167
290
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1168
290
        return Builder.CreateCall(
1169
290
            evalFunc, {opArg, inputElemID, rowIdx, colIdx, vertexI8Idx});
1170
290
      });
1171
82
}
1172
/*
1173
1174
HLSL:
1175
void Barrier(uint MemoryTypeFlags, uint SemanticFlags)
1176
void Barrier(Object o, uint SemanticFlags)
1177
1178
All UAVs and/or Node Records by types:
1179
void @dx.op.barrierByMemoryType(i32 %Opcode,
1180
  i32 %MemoryTypeFlags, i32 %SemanticFlags)
1181
1182
UAV by handle:
1183
void @dx.op.barrierByMemoryHandle(i32 %Opcode,
1184
  %dx.types.Handle %Object, i32 %SemanticFlags)
1185
1186
Node Record by handle:
1187
void @dx.op.barrierByMemoryHandle(i32 %Opcode,
1188
  %dx.types.NodeRecordHandle %Object, i32 %SemanticFlags)
1189
*/
1190
1191
Value *TranslateBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1192
                        HLOperationLowerHelper &helper,
1193
                        HLObjectOperationLowerHelper *pObjHelper,
1194
242
                        bool &Translated) {
1195
242
  hlsl::OP *OP = &helper.hlslOP;
1196
242
  Value *HandleOrMemoryFlags =
1197
242
      CI->getArgOperand(HLOperandIndex::kBarrierMemoryTypeFlagsOpIdx);
1198
242
  Value *SemanticFlags =
1199
242
      CI->getArgOperand(HLOperandIndex::kBarrierSemanticFlagsOpIdx);
1200
242
  IRBuilder<> Builder(CI);
1201
1202
242
  if (HandleOrMemoryFlags->getType()->isIntegerTy()) {
1203
86
    op = OP::OpCode::BarrierByMemoryType;
1204
156
  } else if (HandleOrMemoryFlags->getType() == OP->GetHandleType()) {
1205
80
    op = OP::OpCode::BarrierByMemoryHandle;
1206
80
  } else 
if (76
HandleOrMemoryFlags->getType() == OP->GetNodeRecordHandleType()76
) {
1207
76
    op = OP::OpCode::BarrierByNodeRecordHandle;
1208
76
  } else {
1209
0
    DXASSERT(false, "Shouldn't get here");
1210
0
  }
1211
1212
242
  Function *dxilFunc = OP->GetOpFunc(op, CI->getType());
1213
242
  Constant *opArg = OP->GetU32Const((unsigned)op);
1214
1215
242
  Value *args[] = {opArg, HandleOrMemoryFlags, SemanticFlags};
1216
1217
242
  Builder.CreateCall(dxilFunc, args);
1218
242
  return nullptr;
1219
242
}
1220
1221
Value *TranslateGetGroupOrThreadNodeOutputRecords(
1222
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1223
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
1224
272
    bool isPerThreadRecord, bool &Translated) {
1225
272
  IRBuilder<> Builder(CI);
1226
272
  hlsl::OP *OP = &helper.hlslOP;
1227
272
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1228
272
  Function *dxilFunc = OP->GetOpFunc(op, Builder.getVoidTy());
1229
272
  Value *opArg = OP->GetU32Const((unsigned)op);
1230
272
  Value *count =
1231
272
      CI->getArgOperand(HLOperandIndex::kAllocateRecordNumRecordsIdx);
1232
272
  Value *perThread = OP->GetI1Const(isPerThreadRecord);
1233
1234
272
  Value *args[] = {opArg, handle, count, perThread};
1235
1236
272
  return Builder.CreateCall(dxilFunc, args);
1237
272
}
1238
1239
/*
1240
HLSL:
1241
GroupNodeOutputRecords<recordType>
1242
NodeOutput<recordType>::GetGroupNodeOutputRecords(uint numRecords); DXIL:
1243
%dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode,
1244
%dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread)
1245
*/
1246
Value *
1247
TranslateGetGroupNodeOutputRecords(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1248
                                   HLOperationLowerHelper &helper,
1249
                                   HLObjectOperationLowerHelper *pObjHelper,
1250
144
                                   bool &Translated) {
1251
144
  return TranslateGetGroupOrThreadNodeOutputRecords(
1252
144
      CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ false,
1253
144
      Translated);
1254
144
}
1255
1256
/*
1257
HLSL:
1258
ThreadNodeOutputRecords<recordType>
1259
NodeOutput<recordType>::GetThreadNodeOutputRecords(uint numRecords) DXIL:
1260
%dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode,
1261
%dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread)
1262
*/
1263
Value *TranslateGetThreadNodeOutputRecords(
1264
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1265
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
1266
128
    bool &Translated) {
1267
128
  return TranslateGetGroupOrThreadNodeOutputRecords(
1268
128
      CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ true,
1269
128
      Translated);
1270
128
}
1271
1272
/*
1273
HLSL:
1274
uint EmptyNodeInput::Count()
1275
uint GroupNodeInputRecords<recordType>::Count()
1276
uint RWGroupNodeInputRecords<recordType>::Count()
1277
1278
DXIL:
1279
i32 @dx.op.getInputRecordCount(i32 %Opcode, %dx.types.NodeRecordHandle
1280
%NodeInputHandle)
1281
*/
1282
Value *
1283
TranslateNodeGetInputRecordCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1284
                                 HLOperationLowerHelper &helper,
1285
                                 HLObjectOperationLowerHelper *pObjHelper,
1286
30
                                 bool &Translated) {
1287
30
  hlsl::OP *OP = &helper.hlslOP;
1288
1289
30
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1290
30
  DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType());
1291
30
  Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
1292
30
  Value *opArg = OP->GetU32Const((unsigned)op);
1293
30
  Value *args[] = {opArg, handle};
1294
1295
30
  IRBuilder<> Builder(CI);
1296
30
  return Builder.CreateCall(dxilFunc, args);
1297
30
}
1298
1299
Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1300
                             HLOperationLowerHelper &helper,
1301
                             HLObjectOperationLowerHelper *pObjHelper,
1302
164
                             bool &Translated) {
1303
164
  hlsl::OP *hlslOP = &helper.hlslOP;
1304
164
  Type *Ty = Type::getVoidTy(CI->getContext());
1305
1306
164
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1307
164
  Value *args[] = {opArg};
1308
164
  IRBuilder<> Builder(CI);
1309
164
  Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
1310
1311
164
  return dxilOp;
1312
164
}
1313
1314
Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP,
1315
                                    OP::OpCode opcode,
1316
                                    HLOperationLowerHelper &helper,
1317
                                    HLObjectOperationLowerHelper *pObjHelper,
1318
360
                                    bool &Translated) {
1319
360
  hlsl::OP *hlslOP = &helper.hlslOP;
1320
360
  Type *Ty = CI->getType();
1321
1322
360
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1323
360
  Value *args[] = {opArg};
1324
360
  IRBuilder<> Builder(CI);
1325
360
  Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
1326
1327
360
  return dxilOp;
1328
360
}
1329
1330
Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1331
                               HLOperationLowerHelper &helper,
1332
                               HLObjectOperationLowerHelper *pObjHelper,
1333
16
                               bool &Translated) {
1334
16
  hlsl::OP *hlslOP = &helper.hlslOP;
1335
16
  OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
1336
16
  IRBuilder<> Builder(CI);
1337
1338
16
  Type *Ty = Type::getVoidTy(CI->getContext());
1339
16
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1340
1341
16
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1342
16
  Value *args[] = {opArg, val};
1343
1344
16
  Value *samplePos =
1345
16
      TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
1346
1347
16
  Value *result = UndefValue::get(CI->getType());
1348
16
  Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
1349
16
  Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
1350
16
  result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
1351
16
  result = Builder.CreateInsertElement(result, samplePosY, 1);
1352
16
  return result;
1353
16
}
1354
1355
// val QuadReadLaneAt(val, uint);
1356
Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1357
                               HLOperationLowerHelper &helper,
1358
                               HLObjectOperationLowerHelper *pObjHelper,
1359
66
                               bool &Translated) {
1360
66
  hlsl::OP *hlslOP = &helper.hlslOP;
1361
66
  Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
1362
66
  return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
1363
66
                              CI->getOperand(1)->getType(), CI, hlslOP);
1364
66
}
1365
1366
// Quad intrinsics of the form fn(val,QuadOpKind)->val
1367
Value *TranslateQuadAnyAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1368
                           HLOperationLowerHelper &helper,
1369
                           HLObjectOperationLowerHelper *pObjHelper,
1370
22
                           bool &Translated) {
1371
22
  hlsl::OP *hlslOP = &helper.hlslOP;
1372
22
  DXIL::QuadVoteOpKind opKind;
1373
22
  switch (IOP) {
1374
10
  case IntrinsicOp::IOP_QuadAll:
1375
10
    opKind = DXIL::QuadVoteOpKind::All;
1376
10
    break;
1377
12
  case IntrinsicOp::IOP_QuadAny:
1378
12
    opKind = DXIL::QuadVoteOpKind::Any;
1379
12
    break;
1380
0
  default:
1381
0
    llvm_unreachable(
1382
22
        "QuadAny/QuadAll translation called with wrong isntruction");
1383
22
  }
1384
22
  Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
1385
22
  Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
1386
22
  return TrivialDxilOperation(DXIL::OpCode::QuadVote, refArgs,
1387
22
                              CI->getOperand(1)->getType(), CI, hlslOP);
1388
22
}
1389
1390
// Wave intrinsics of the form fn(val,QuadOpKind)->val
1391
Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1392
                               HLOperationLowerHelper &helper,
1393
                               HLObjectOperationLowerHelper *pObjHelper,
1394
102
                               bool &Translated) {
1395
102
  hlsl::OP *hlslOP = &helper.hlslOP;
1396
102
  DXIL::QuadOpKind opKind;
1397
102
  switch (IOP) {
1398
34
  case IntrinsicOp::IOP_QuadReadAcrossX:
1399
34
    opKind = DXIL::QuadOpKind::ReadAcrossX;
1400
34
    break;
1401
32
  case IntrinsicOp::IOP_QuadReadAcrossY:
1402
32
    opKind = DXIL::QuadOpKind::ReadAcrossY;
1403
32
    break;
1404
0
  default:
1405
0
    DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
1406
0
    LLVM_FALLTHROUGH;
1407
36
  case IntrinsicOp::IOP_QuadReadAcrossDiagonal:
1408
36
    opKind = DXIL::QuadOpKind::ReadAcrossDiagonal;
1409
36
    break;
1410
102
  }
1411
102
  Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
1412
102
  Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
1413
102
  return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
1414
102
                              CI->getOperand(1)->getType(), CI, hlslOP);
1415
102
}
1416
1417
// WaveAllEqual(val<n>)->bool<n>
1418
Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1419
                             HLOperationLowerHelper &helper,
1420
                             HLObjectOperationLowerHelper *pObjHelper,
1421
80
                             bool &Translated) {
1422
80
  hlsl::OP *hlslOP = &helper.hlslOP;
1423
80
  Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
1424
80
  IRBuilder<> Builder(CI);
1425
1426
80
  Type *Ty = src->getType();
1427
80
  Type *RetTy = Type::getInt1Ty(CI->getContext());
1428
80
  if (Ty->isVectorTy())
1429
4
    RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
1430
1431
80
  Constant *opArg =
1432
80
      hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
1433
80
  Value *args[] = {opArg, src};
1434
1435
80
  return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
1436
80
                              hlslOP, Builder);
1437
80
}
1438
1439
// WaveMatch(val<n>)->uint4
1440
Value *TranslateWaveMatch(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
1441
                          HLOperationLowerHelper &Helper,
1442
                          HLObjectOperationLowerHelper *ObjHelper,
1443
46
                          bool &Translated) {
1444
46
  hlsl::OP *Op = &Helper.hlslOP;
1445
46
  IRBuilder<> Builder(CI);
1446
1447
  // Generate a dx.op.waveMatch call for each scalar in the input, and perform
1448
  // a bitwise AND between each result to derive the final bitmask in the case
1449
  // of vector inputs.
1450
1451
  // (1) Collect the list of all scalar inputs (e.g. decompose vectors)
1452
46
  SmallVector<Value *, 4> ScalarInputs;
1453
1454
46
  Value *Val = CI->getArgOperand(1);
1455
46
  Type *ValTy = Val->getType();
1456
46
  Type *EltTy = ValTy->getScalarType();
1457
1458
46
  if (ValTy->isVectorTy()) {
1459
78
    for (uint64_t i = 0, e = ValTy->getVectorNumElements(); i != e; 
++i64
) {
1460
64
      Value *Elt = Builder.CreateExtractElement(Val, i);
1461
64
      ScalarInputs.push_back(Elt);
1462
64
    }
1463
32
  } else {
1464
32
    ScalarInputs.push_back(Val);
1465
32
  }
1466
1467
46
  Value *Res = nullptr;
1468
46
  Constant *OpcArg = Op->GetU32Const((unsigned)DXIL::OpCode::WaveMatch);
1469
46
  Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, EltTy);
1470
1471
  // (2) For each scalar, emit a call to dx.op.waveMatch. If this is not the
1472
  // first scalar, then AND the result with the accumulator.
1473
142
  for (unsigned i = 0, e = ScalarInputs.size(); i != e; 
++i96
) {
1474
96
    Value *Args[] = {OpcArg, ScalarInputs[i]};
1475
96
    Value *Call = Builder.CreateCall(Fn, Args);
1476
1477
96
    if (Res) {
1478
      // Generate bitwise AND of the components
1479
250
      for (unsigned j = 0; j != 4; 
++j200
) {
1480
200
        Value *ResVal = Builder.CreateExtractValue(Res, j);
1481
200
        Value *CallVal = Builder.CreateExtractValue(Call, j);
1482
200
        Value *And = Builder.CreateAnd(ResVal, CallVal);
1483
200
        Res = Builder.CreateInsertValue(Res, And, j);
1484
200
      }
1485
50
    } else {
1486
46
      Res = Call;
1487
46
    }
1488
96
  }
1489
1490
  // (3) Convert the final aggregate into a vector to make the types match
1491
46
  Value *ResVec = UndefValue::get(CI->getType());
1492
230
  for (unsigned i = 0; i != 4; 
++i184
) {
1493
184
    Value *Elt = Builder.CreateExtractValue(Res, i);
1494
184
    ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
1495
184
  }
1496
1497
46
  return ResVec;
1498
46
}
1499
1500
// Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
1501
Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1502
                        HLOperationLowerHelper &helper,
1503
                        HLObjectOperationLowerHelper *pObjHelper,
1504
162
                        bool &Translated) {
1505
162
  hlsl::OP *hlslOP = &helper.hlslOP;
1506
162
  Value *refArgs[] = {nullptr, CI->getOperand(1)};
1507
162
  return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
1508
162
}
1509
// Wave ballot intrinsic.
1510
Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1511
                           HLOperationLowerHelper &helper,
1512
                           HLObjectOperationLowerHelper *pObjHelper,
1513
32
                           bool &Translated) {
1514
  // The high-level operation is uint4 ballot(i1).
1515
  // The DXIL operation is struct.u4 ballot(i1).
1516
  // To avoid updating users with more than a simple replace, we translate into
1517
  // a call into struct.u4, then reassemble the vector.
1518
  // Scalarization and constant propagation take care of cleanup.
1519
32
  IRBuilder<> B(CI);
1520
1521
  // Make the DXIL call itself.
1522
32
  hlsl::OP *hlslOP = &helper.hlslOP;
1523
32
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1524
32
  Value *refArgs[] = {opArg, CI->getOperand(1)};
1525
32
  Function *dxilFunc =
1526
32
      hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
1527
32
  Value *dxilVal =
1528
32
      B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
1529
1530
  // Assign from the call results into a vector.
1531
32
  Type *ResTy = CI->getType();
1532
32
  DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
1533
32
  DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
1534
32
                 dxilVal->getType()->getNumContainedTypes() == 4);
1535
1536
  // 'x' component is the first vector element, highest bits.
1537
32
  Value *ResVal = llvm::UndefValue::get(ResTy);
1538
160
  for (unsigned Idx = 0; Idx < 4; 
++Idx128
) {
1539
128
    ResVal = B.CreateInsertElement(
1540
128
        ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
1541
128
  }
1542
1543
32
  return ResVal;
1544
32
}
1545
1546
670
static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
1547
670
  return opcode == OP::OpCode::WaveActiveOp ||
1548
670
         
opcode == OP::OpCode::WavePrefixOp288
;
1549
670
}
1550
1551
946
static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
1552
946
  if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
1553
946
      
IOP == IntrinsicOp::IOP_WaveActiveUMin908
||
1554
946
      
IOP == IntrinsicOp::IOP_WaveActiveUSum870
||
1555
946
      
IOP == IntrinsicOp::IOP_WaveActiveUProduct840
||
1556
946
      
IOP == IntrinsicOp::IOP_WaveMultiPrefixUProduct834
||
1557
946
      
IOP == IntrinsicOp::IOP_WaveMultiPrefixUSum820
||
1558
946
      
IOP == IntrinsicOp::IOP_WavePrefixUSum806
||
1559
946
      
IOP == IntrinsicOp::IOP_WavePrefixUProduct776
)
1560
176
    return (unsigned)DXIL::SignedOpKind::Unsigned;
1561
770
  return (unsigned)DXIL::SignedOpKind::Signed;
1562
946
}
1563
1564
946
static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
1565
946
  switch (IOP) {
1566
  // Bit operations.
1567
28
  case IntrinsicOp::IOP_WaveActiveBitOr:
1568
28
    return (unsigned)DXIL::WaveBitOpKind::Or;
1569
62
  case IntrinsicOp::IOP_WaveActiveBitAnd:
1570
62
    return (unsigned)DXIL::WaveBitOpKind::And;
1571
44
  case IntrinsicOp::IOP_WaveActiveBitXor:
1572
44
    return (unsigned)DXIL::WaveBitOpKind::Xor;
1573
  // Prefix operations.
1574
44
  case IntrinsicOp::IOP_WavePrefixSum:
1575
74
  case IntrinsicOp::IOP_WavePrefixUSum:
1576
74
    return (unsigned)DXIL::WaveOpKind::Sum;
1577
74
  case IntrinsicOp::IOP_WavePrefixProduct:
1578
80
  case IntrinsicOp::IOP_WavePrefixUProduct:
1579
80
    return (unsigned)DXIL::WaveOpKind::Product;
1580
    // Numeric operations.
1581
46
  case IntrinsicOp::IOP_WaveActiveMax:
1582
84
  case IntrinsicOp::IOP_WaveActiveUMax:
1583
84
    return (unsigned)DXIL::WaveOpKind::Max;
1584
60
  case IntrinsicOp::IOP_WaveActiveMin:
1585
98
  case IntrinsicOp::IOP_WaveActiveUMin:
1586
98
    return (unsigned)DXIL::WaveOpKind::Min;
1587
90
  case IntrinsicOp::IOP_WaveActiveSum:
1588
120
  case IntrinsicOp::IOP_WaveActiveUSum:
1589
120
    return (unsigned)DXIL::WaveOpKind::Sum;
1590
74
  case IntrinsicOp::IOP_WaveActiveProduct:
1591
80
  case IntrinsicOp::IOP_WaveActiveUProduct:
1592
  // MultiPrefix operations
1593
124
  case IntrinsicOp::IOP_WaveMultiPrefixBitAnd:
1594
124
    return (unsigned)DXIL::WaveMultiPrefixOpKind::And;
1595
44
  case IntrinsicOp::IOP_WaveMultiPrefixBitOr:
1596
44
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Or;
1597
44
  case IntrinsicOp::IOP_WaveMultiPrefixBitXor:
1598
44
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Xor;
1599
58
  case IntrinsicOp::IOP_WaveMultiPrefixProduct:
1600
72
  case IntrinsicOp::IOP_WaveMultiPrefixUProduct:
1601
72
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Product;
1602
58
  case IntrinsicOp::IOP_WaveMultiPrefixSum:
1603
72
  case IntrinsicOp::IOP_WaveMultiPrefixUSum:
1604
72
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Sum;
1605
0
  default:
1606
0
    DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
1607
0
                 IOP == IntrinsicOp::IOP_WaveActiveUProduct,
1608
0
             "else caller passed incorrect value");
1609
0
    return (unsigned)DXIL::WaveOpKind::Product;
1610
946
  }
1611
946
}
1612
1613
// Wave intrinsics of the form fn(valA)->valA
1614
Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1615
                        HLOperationLowerHelper &helper,
1616
                        HLObjectOperationLowerHelper *pObjHelper,
1617
670
                        bool &Translated) {
1618
670
  hlsl::OP *hlslOP = &helper.hlslOP;
1619
1620
670
  Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
1621
670
  Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
1622
670
  Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
1623
670
  unsigned refArgCount = _countof(refArgs);
1624
670
  if (!WaveIntrinsicNeedsSign(opcode))
1625
134
    refArgCount--;
1626
670
  return TrivialDxilOperation(opcode,
1627
670
                              llvm::ArrayRef<Value *>(refArgs, refArgCount),
1628
670
                              CI->getOperand(1)->getType(), CI, hlslOP);
1629
670
}
1630
1631
// WaveMultiPrefixOP(val<n>, mask) -> val<n>
1632
Value *TranslateWaveMultiPrefix(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
1633
                                HLOperationLowerHelper &Helper,
1634
                                HLObjectOperationLowerHelper *ObjHelper,
1635
276
                                bool &Translated) {
1636
276
  hlsl::OP *Op = &Helper.hlslOP;
1637
1638
276
  Constant *KindValInt = Op->GetI8Const(WaveIntrinsicToOpKind(IOP));
1639
276
  Constant *SignValInt = Op->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
1640
1641
  // Decompose mask into scalars
1642
276
  IRBuilder<> Builder(CI);
1643
276
  Value *Mask = CI->getArgOperand(2);
1644
276
  Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
1645
276
  Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
1646
276
  Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
1647
276
  Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
1648
1649
276
  Value *Args[] = {nullptr, CI->getOperand(1), Mask0,     Mask1, Mask2,
1650
276
                   Mask3,   KindValInt,        SignValInt};
1651
1652
276
  return TrivialDxilOperation(Opc, Args, CI->getOperand(1)->getType(), CI, Op);
1653
276
}
1654
1655
// WaveMultiPrefixBitCount(i1, mask) -> i32
1656
Value *TranslateWaveMultiPrefixBitCount(CallInst *CI, IntrinsicOp IOP,
1657
                                        OP::OpCode Opc,
1658
                                        HLOperationLowerHelper &Helper,
1659
                                        HLObjectOperationLowerHelper *ObjHelper,
1660
40
                                        bool &Translated) {
1661
40
  hlsl::OP *Op = &Helper.hlslOP;
1662
1663
  // Decompose mask into scalars
1664
40
  IRBuilder<> Builder(CI);
1665
40
  Value *Mask = CI->getArgOperand(2);
1666
40
  Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
1667
40
  Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
1668
40
  Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
1669
40
  Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
1670
1671
40
  Value *Args[] = {nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, Mask3};
1672
1673
40
  return TrivialDxilOperation(Opc, Args, Helper.voidTy, CI, Op);
1674
40
}
1675
1676
// Wave intrinsics of the form fn()->val
1677
Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1678
                          HLOperationLowerHelper &helper,
1679
                          HLObjectOperationLowerHelper *pObjHelper,
1680
96
                          bool &Translated) {
1681
96
  hlsl::OP *hlslOP = &helper.hlslOP;
1682
96
  Value *refArgs[] = {nullptr};
1683
96
  return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
1684
96
}
1685
1686
// Wave intrinsics of the form fn(val,lane)->val
1687
Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1688
                               HLOperationLowerHelper &helper,
1689
                               HLObjectOperationLowerHelper *pObjHelper,
1690
98
                               bool &Translated) {
1691
98
  hlsl::OP *hlslOP = &helper.hlslOP;
1692
98
  Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
1693
98
  return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
1694
98
                              CI->getOperand(1)->getType(), CI, hlslOP);
1695
98
}
1696
1697
// Wave intrinsics of the form fn(val)->val
1698
Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
1699
                                  OP::OpCode opcode,
1700
                                  HLOperationLowerHelper &helper,
1701
                                  HLObjectOperationLowerHelper *pObjHelper,
1702
274
                                  bool &Translated) {
1703
274
  hlsl::OP *hlslOP = &helper.hlslOP;
1704
274
  Value *refArgs[] = {nullptr, CI->getOperand(1)};
1705
274
  return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
1706
274
                              CI->getOperand(1)->getType(), CI, hlslOP);
1707
274
}
1708
1709
Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1710
                    HLOperationLowerHelper &helper,
1711
                    HLObjectOperationLowerHelper *pObjHelper,
1712
950
                    bool &Translated) {
1713
950
  hlsl::OP *hlslOP = &helper.hlslOP;
1714
950
  Type *pOverloadTy = CI->getType()->getScalarType();
1715
950
  if (pOverloadTy->isFloatingPointTy()) {
1716
804
    Value *refArgs[] = {nullptr, CI->getOperand(1)};
1717
804
    return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
1718
804
                                hlslOP);
1719
804
  }
1720
1721
146
  Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1722
146
  IRBuilder<> Builder(CI);
1723
146
  Value *neg = Builder.CreateNeg(src);
1724
146
  return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP,
1725
146
                                    Builder);
1726
950
}
1727
1728
Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1729
                     HLOperationLowerHelper &helper,
1730
                     HLObjectOperationLowerHelper *pObjHelper,
1731
24
                     bool &Translated) {
1732
24
  return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op
1733
24
}
1734
1735
312
Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
1736
312
  Type *Ty = val->getType();
1737
312
  Type *EltTy = Ty->getScalarType();
1738
1739
312
  Constant *zero = nullptr;
1740
312
  if (EltTy->isFloatingPointTy())
1741
36
    zero = ConstantFP::get(EltTy, 0);
1742
276
  else
1743
276
    zero = ConstantInt::get(EltTy, 0);
1744
1745
312
  if (Ty != EltTy)
1746
270
    zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
1747
1748
312
  if (EltTy->isFloatingPointTy())
1749
36
    return Builder.CreateFCmpUNE(val, zero);
1750
1751
276
  return Builder.CreateICmpNE(val, zero);
1752
312
}
1753
1754
144
Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) {
1755
144
  Value *cond = GenerateCmpNEZero(val, Builder);
1756
1757
144
  Type *Ty = val->getType();
1758
144
  Type *EltTy = Ty->getScalarType();
1759
1760
144
  if (Ty == EltTy)
1761
24
    return cond;
1762
1763
120
  Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
1764
560
  for (unsigned i = 1; i < Ty->getVectorNumElements(); 
i++440
) {
1765
440
    Value *Elt = Builder.CreateExtractElement(cond, i);
1766
440
    Result = Builder.CreateAnd(Result, Elt);
1767
440
  }
1768
1769
120
  return Result;
1770
144
}
1771
1772
Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1773
                    HLOperationLowerHelper &helper,
1774
                    HLObjectOperationLowerHelper *pObjHelper,
1775
144
                    bool &Translated) {
1776
144
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1777
144
  IRBuilder<> Builder(CI);
1778
144
  return TranslateAllForValue(val, Builder);
1779
144
}
1780
1781
Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1782
                    HLOperationLowerHelper &helper,
1783
                    HLObjectOperationLowerHelper *pObjHelper,
1784
168
                    bool &Translated) {
1785
168
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1786
1787
168
  IRBuilder<> Builder(CI);
1788
1789
168
  Value *cond = GenerateCmpNEZero(val, Builder);
1790
1791
168
  Type *Ty = val->getType();
1792
168
  Type *EltTy = Ty->getScalarType();
1793
1794
168
  if (Ty == EltTy)
1795
18
    return cond;
1796
1797
150
  Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0);
1798
688
  for (unsigned i = 1; i < Ty->getVectorNumElements(); 
i++538
) {
1799
538
    Value *Elt = Builder.CreateExtractElement(cond, i);
1800
538
    Result = Builder.CreateOr(Result, Elt);
1801
538
  }
1802
150
  return Result;
1803
168
}
1804
1805
Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1806
                        HLOperationLowerHelper &helper,
1807
                        HLObjectOperationLowerHelper *pObjHelper,
1808
1.83k
                        bool &Translated) {
1809
1.83k
  Type *Ty = CI->getType();
1810
1.83k
  Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1811
1.83k
  IRBuilder<> Builder(CI);
1812
1.83k
  return Builder.CreateBitCast(op, Ty);
1813
1.83k
}
1814
1815
Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
1816
32
                             IRBuilder<> &Builder, hlsl::OP *hlslOP) {
1817
32
  Type *Ty = x->getType();
1818
32
  Type *outTy = lo->getType()->getPointerElementType();
1819
32
  DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
1820
1821
32
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
1822
32
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
1823
1824
32
  if (Ty->isVectorTy()) {
1825
8
    Value *retValLo = llvm::UndefValue::get(outTy);
1826
8
    Value *retValHi = llvm::UndefValue::get(outTy);
1827
8
    unsigned vecSize = Ty->getVectorNumElements();
1828
1829
24
    for (unsigned i = 0; i < vecSize; 
i++16
) {
1830
16
      Value *Elt = Builder.CreateExtractElement(x, i);
1831
16
      Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
1832
16
                                        hlslOP->GetOpCodeName(opcode));
1833
16
      Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
1834
16
      retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
1835
16
      Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
1836
16
      retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
1837
16
    }
1838
8
    Builder.CreateStore(retValLo, lo);
1839
8
    Builder.CreateStore(retValHi, hi);
1840
24
  } else {
1841
24
    Value *retVal =
1842
24
        Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
1843
24
    Value *retValLo = Builder.CreateExtractValue(retVal, 0);
1844
24
    Value *retValHi = Builder.CreateExtractValue(retVal, 1);
1845
24
    Builder.CreateStore(retValLo, lo);
1846
24
    Builder.CreateStore(retValHi, hi);
1847
24
  }
1848
1849
32
  return nullptr;
1850
32
}
1851
1852
Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1853
                       HLOperationLowerHelper &helper,
1854
                       HLObjectOperationLowerHelper *pObjHelper,
1855
600
                       bool &Translated) {
1856
600
  if (CI->getNumArgOperands() == 2)
1857
568
    return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
1858
1859
32
  DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
1860
32
  hlsl::OP *hlslOP = &helper.hlslOP;
1861
32
  Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
1862
32
  DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
1863
32
  Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
1864
32
  Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
1865
32
  IRBuilder<> Builder(CI);
1866
32
  return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
1867
600
}
1868
1869
Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1870
                         HLOperationLowerHelper &helper,
1871
                         HLObjectOperationLowerHelper *pObjHelper,
1872
66
                         bool &Translated) {
1873
66
  hlsl::OP *hlslOP = &helper.hlslOP;
1874
66
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1875
66
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1876
1877
66
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
1878
66
  IRBuilder<> Builder(CI);
1879
66
  return TrivialDxilOperation(opcode, {opArg, x, y}, CI->getType(),
1880
66
                              CI->getType(), hlslOP, Builder);
1881
66
}
1882
1883
Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1884
                      HLOperationLowerHelper &helper,
1885
                      HLObjectOperationLowerHelper *pObjHelper,
1886
56
                      bool &Translated) {
1887
56
  hlsl::OP *hlslOP = &helper.hlslOP;
1888
56
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1889
56
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1890
1891
56
  IRBuilder<> Builder(CI);
1892
56
  Value *tan = Builder.CreateFDiv(y, x);
1893
1894
56
  Value *atan =
1895
56
      TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
1896
  // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
1897
56
  Type *Ty = x->getType();
1898
56
  Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
1899
56
  Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
1900
56
  Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
1901
56
  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
1902
56
  if (Ty->isVectorTy()) {
1903
22
    unsigned vecSize = Ty->getVectorNumElements();
1904
22
    pi = ConstantVector::getSplat(vecSize, pi);
1905
22
    halfPi = ConstantVector::getSplat(vecSize, halfPi);
1906
22
    negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi);
1907
22
    zero = ConstantVector::getSplat(vecSize, zero);
1908
22
  }
1909
56
  Value *atanAddPi = Builder.CreateFAdd(atan, pi);
1910
56
  Value *atanSubPi = Builder.CreateFSub(atan, pi);
1911
1912
  // x > 0 -> atan.
1913
56
  Value *result = atan;
1914
56
  Value *xLt0 = Builder.CreateFCmpOLT(x, zero);
1915
56
  Value *xEq0 = Builder.CreateFCmpOEQ(x, zero);
1916
1917
56
  Value *yGe0 = Builder.CreateFCmpOGE(y, zero);
1918
56
  Value *yLt0 = Builder.CreateFCmpOLT(y, zero);
1919
  // x < 0, y >= 0 -> atan + pi.
1920
56
  Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0);
1921
56
  result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result);
1922
1923
  // x < 0, y < 0 -> atan - pi.
1924
56
  Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0);
1925
56
  result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result);
1926
1927
  // x == 0, y < 0 -> -pi/2
1928
56
  Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0);
1929
56
  result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result);
1930
  // x == 0, y > 0 -> pi/2
1931
56
  Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0);
1932
56
  result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result);
1933
1934
56
  return result;
1935
56
}
1936
1937
Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1938
                      HLOperationLowerHelper &helper,
1939
                      HLObjectOperationLowerHelper *pObjHelper,
1940
764
                      bool &Translated) {
1941
764
  hlsl::OP *hlslOP = &helper.hlslOP;
1942
764
  Type *Ty = CI->getType();
1943
764
  Type *EltTy = Ty->getScalarType();
1944
764
  DXIL::OpCode maxOp = DXIL::OpCode::FMax;
1945
764
  DXIL::OpCode minOp = DXIL::OpCode::FMin;
1946
764
  if (IOP == IntrinsicOp::IOP_uclamp) {
1947
56
    maxOp = DXIL::OpCode::UMax;
1948
56
    minOp = DXIL::OpCode::UMin;
1949
708
  } else if (EltTy->isIntegerTy()) {
1950
48
    maxOp = DXIL::OpCode::IMax;
1951
48
    minOp = DXIL::OpCode::IMin;
1952
48
  }
1953
1954
764
  Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
1955
764
  Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
1956
764
  Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
1957
1958
764
  IRBuilder<> Builder(CI);
1959
  // min(max(x, minVal), maxVal).
1960
764
  Value *maxXMinVal =
1961
764
      TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
1962
764
  return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
1963
764
}
1964
1965
Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1966
                     HLOperationLowerHelper &helper,
1967
                     HLObjectOperationLowerHelper *pObjHelper,
1968
110
                     bool &Translated) {
1969
110
  hlsl::OP *hlslOP = &helper.hlslOP;
1970
110
  Function *discard =
1971
110
      hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
1972
110
  IRBuilder<> Builder(CI);
1973
110
  Value *cond = nullptr;
1974
110
  Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1975
110
  if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
1976
14
    Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
1977
14
    cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
1978
50
    for (unsigned i = 1; i < VT->getNumElements(); 
i++36
) {
1979
36
      Value *elt = Builder.CreateExtractElement(arg, i);
1980
36
      Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
1981
36
      cond = Builder.CreateOr(cond, eltCond);
1982
36
    }
1983
14
  } else
1984
96
    cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
1985
1986
  /*If discard condition evaluates to false at compile-time, then
1987
  don't emit the discard instruction.*/
1988
110
  if (ConstantInt *constCond = dyn_cast<ConstantInt>(cond))
1989
78
    if (!constCond->getLimitedValue())
1990
10
      return nullptr;
1991
1992
100
  Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
1993
100
  Builder.CreateCall(discard, {opArg, cond});
1994
100
  return nullptr;
1995
110
}
1996
1997
Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1998
                      HLOperationLowerHelper &helper,
1999
                      HLObjectOperationLowerHelper *pObjHelper,
2000
104
                      bool &Translated) {
2001
104
  VectorType *VT = cast<VectorType>(CI->getType());
2002
104
  DXASSERT_NOMSG(VT->getNumElements() == 3);
2003
2004
104
  Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2005
104
  Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2006
2007
104
  IRBuilder<> Builder(CI);
2008
104
  Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
2009
104
  Value *op0_y = Builder.CreateExtractElement(op0, 1);
2010
104
  Value *op0_z = Builder.CreateExtractElement(op0, 2);
2011
2012
104
  Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
2013
104
  Value *op1_y = Builder.CreateExtractElement(op1, 1);
2014
104
  Value *op1_z = Builder.CreateExtractElement(op1, 2);
2015
2016
312
  auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
2017
312
    Value *xy = Builder.CreateFMul(x0, y1);
2018
312
    Value *yx = Builder.CreateFMul(y0, x1);
2019
312
    return Builder.CreateFSub(xy, yx);
2020
312
  };
2021
2022
104
  Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
2023
104
  Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
2024
104
  Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
2025
2026
104
  Value *cross = UndefValue::get(VT);
2027
104
  cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
2028
104
  cross = Builder.CreateInsertElement(cross, zx_xz, 1);
2029
104
  cross = Builder.CreateInsertElement(cross, xy_yx, 2);
2030
104
  return cross;
2031
104
}
2032
2033
Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2034
                        HLOperationLowerHelper &helper,
2035
                        HLObjectOperationLowerHelper *pObjHelper,
2036
32
                        bool &Translated) {
2037
32
  IRBuilder<> Builder(CI);
2038
32
  Type *Ty = CI->getType();
2039
32
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2040
  // 180/pi.
2041
32
  Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
2042
32
  if (Ty != Ty->getScalarType()) {
2043
16
    toDegreeConst =
2044
16
        ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
2045
16
  }
2046
32
  return Builder.CreateFMul(toDegreeConst, val);
2047
32
}
2048
2049
Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2050
                    HLOperationLowerHelper &helper,
2051
                    HLObjectOperationLowerHelper *pObjHelper,
2052
16
                    bool &Translated) {
2053
16
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2054
16
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2055
16
  Type *Ty = src1->getType();
2056
16
  IRBuilder<> Builder(CI);
2057
16
  Value *Result = UndefValue::get(Ty);
2058
16
  Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
2059
  // dest.x = 1;
2060
16
  Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
2061
  // dest.y = src0.y * src1.y;
2062
16
  Value *src0_y = Builder.CreateExtractElement(src0, 1);
2063
16
  Value *src1_y = Builder.CreateExtractElement(src1, 1);
2064
16
  Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
2065
16
  Result = Builder.CreateInsertElement(Result, yMuly, 1);
2066
  // dest.z = src0.z;
2067
16
  Value *src0_z = Builder.CreateExtractElement(src0, 2);
2068
16
  Result = Builder.CreateInsertElement(Result, src0_z, 2);
2069
  // dest.w = src1.w;
2070
16
  Value *src1_w = Builder.CreateExtractElement(src1, 3);
2071
16
  Result = Builder.CreateInsertElement(Result, src1_w, 3);
2072
16
  return Result;
2073
16
}
2074
2075
Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2076
                           HLOperationLowerHelper &helper,
2077
                           HLObjectOperationLowerHelper *pObjHelper,
2078
204
                           bool &Translated) {
2079
204
  hlsl::OP *OP = &helper.hlslOP;
2080
204
  IRBuilder<> Builder(CI);
2081
204
  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2082
2083
204
  Type *Ty = Src->getType();
2084
204
  Type *RetTy = Type::getInt32Ty(CI->getContext());
2085
204
  unsigned NumElements = 0;
2086
204
  if (Ty->isVectorTy()) {
2087
38
    NumElements = Ty->getVectorNumElements();
2088
38
    RetTy = VectorType::get(RetTy, NumElements);
2089
38
  }
2090
2091
204
  Constant *OpArg = OP->GetU32Const((unsigned)opcode);
2092
204
  Value *Args[] = {OpArg, Src};
2093
2094
204
  Value *FirstbitHi =
2095
204
      TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
2096
2097
204
  IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
2098
204
  Constant *Neg1 = Builder.getInt32(-1);
2099
204
  Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1);
2100
2101
204
  if (NumElements > 0) {
2102
38
    Neg1 = ConstantVector::getSplat(NumElements, Neg1);
2103
38
    BitWidth = ConstantVector::getSplat(NumElements, BitWidth);
2104
38
  }
2105
2106
204
  Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi);
2107
204
  Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi);
2108
204
  return Builder.CreateSelect(Cond, Neg1, Sub);
2109
204
}
2110
2111
Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2112
                           HLOperationLowerHelper &helper,
2113
                           HLObjectOperationLowerHelper *pObjHelper,
2114
178
                           bool &Translated) {
2115
178
  hlsl::OP *OP = &helper.hlslOP;
2116
178
  IRBuilder<> Builder(CI);
2117
178
  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2118
2119
178
  Type *Ty = Src->getType();
2120
178
  Type *RetTy = Type::getInt32Ty(CI->getContext());
2121
178
  if (Ty->isVectorTy())
2122
40
    RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
2123
2124
178
  Constant *OpArg = OP->GetU32Const((unsigned)opcode);
2125
178
  Value *Args[] = {OpArg, Src};
2126
2127
178
  Value *FirstbitLo =
2128
178
      TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
2129
2130
178
  return FirstbitLo;
2131
178
}
2132
2133
Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2134
                    HLOperationLowerHelper &helper,
2135
                    HLObjectOperationLowerHelper *pObjHelper,
2136
24
                    bool &Translated) {
2137
24
  Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
2138
24
  Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
2139
24
  Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
2140
24
  IRBuilder<> Builder(CI);
2141
2142
24
  Type *Ty = m->getType();
2143
24
  Value *Result = UndefValue::get(VectorType::get(Ty, 4));
2144
  // Result = (ambient, diffuse, specular, 1)
2145
  // ambient = 1.
2146
24
  Constant *oneConst = ConstantFP::get(Ty, 1);
2147
24
  Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
2148
  // Result.w = 1.
2149
24
  Result = Builder.CreateInsertElement(Result, oneConst, 3);
2150
  // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
2151
24
  Constant *zeroConst = ConstantFP::get(Ty, 0);
2152
24
  Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
2153
24
  Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
2154
24
  Result = Builder.CreateInsertElement(Result, diffuse, 1);
2155
  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
2156
24
  Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
2157
24
  Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
2158
24
  bool isFXCCompatMode =
2159
24
      CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
2160
24
  Value *nhPowM =
2161
24
      TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
2162
24
  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
2163
24
  Result = Builder.CreateInsertElement(Result, spec, 2);
2164
24
  return Result;
2165
24
}
2166
2167
Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2168
                        HLOperationLowerHelper &helper,
2169
                        HLObjectOperationLowerHelper *pObjHelper,
2170
36
                        bool &Translated) {
2171
36
  IRBuilder<> Builder(CI);
2172
36
  Type *Ty = CI->getType();
2173
36
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2174
  // pi/180.
2175
36
  Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
2176
36
  if (Ty != Ty->getScalarType()) {
2177
20
    toRadianConst =
2178
20
        ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
2179
20
  }
2180
36
  return Builder.CreateFMul(toRadianConst, val);
2181
36
}
2182
2183
Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2184
                         HLOperationLowerHelper &helper,
2185
                         HLObjectOperationLowerHelper *pObjHelper,
2186
384
                         bool &Translated) {
2187
384
  IRBuilder<> Builder(CI);
2188
2189
384
  Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2190
384
  Type *Ty = CI->getType();
2191
2192
384
  Function *f16tof32 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
2193
384
  return TrivialDxilOperation(
2194
384
      f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
2195
384
      x->getType(), Ty, &helper.hlslOP, Builder);
2196
384
}
2197
2198
Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2199
                         HLOperationLowerHelper &helper,
2200
                         HLObjectOperationLowerHelper *pObjHelper,
2201
304
                         bool &Translated) {
2202
304
  IRBuilder<> Builder(CI);
2203
2204
304
  Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2205
304
  Type *Ty = CI->getType();
2206
2207
304
  Function *f32tof16 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
2208
304
  return TrivialDxilOperation(
2209
304
      f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
2210
304
      x->getType(), Ty, &helper.hlslOP, Builder);
2211
304
}
2212
2213
282
Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
2214
282
  IRBuilder<> Builder(CI);
2215
282
  if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
2216
282
    Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
2217
282
    unsigned size = VT->getNumElements();
2218
282
    if (size > 1) {
2219
282
      Value *Sum = Builder.CreateFMul(Elt, Elt);
2220
710
      for (unsigned i = 1; i < size; 
i++428
) {
2221
428
        Elt = Builder.CreateExtractElement(val, i);
2222
428
        Value *Mul = Builder.CreateFMul(Elt, Elt);
2223
428
        Sum = Builder.CreateFAdd(Sum, Mul);
2224
428
      }
2225
282
      DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
2226
282
      Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
2227
282
      Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
2228
282
      return Builder.CreateCall(dxilSqrt, {opArg, Sum},
2229
282
                                hlslOP->GetOpCodeName(sqrt));
2230
282
    } else {
2231
0
      val = Elt;
2232
0
    }
2233
282
  }
2234
0
  DXIL::OpCode fabs = DXIL::OpCode::FAbs;
2235
0
  Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
2236
0
  Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
2237
0
  return Builder.CreateCall(dxilFAbs, {opArg, val},
2238
0
                            hlslOP->GetOpCodeName(fabs));
2239
282
}
2240
2241
Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2242
                       HLOperationLowerHelper &helper,
2243
                       HLObjectOperationLowerHelper *pObjHelper,
2244
226
                       bool &Translated) {
2245
226
  hlsl::OP *hlslOP = &helper.hlslOP;
2246
226
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2247
226
  return TranslateLength(CI, val, hlslOP);
2248
226
}
2249
2250
Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2251
                     HLOperationLowerHelper &helper,
2252
                     HLObjectOperationLowerHelper *pObjHelper,
2253
64
                     bool &Translated) {
2254
64
  hlsl::OP *hlslOP = &helper.hlslOP;
2255
64
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2256
64
  Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2257
64
  IRBuilder<> Builder(CI);
2258
64
  Value *intP =
2259
64
      TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
2260
64
  Value *fracP = Builder.CreateFSub(val, intP);
2261
64
  Builder.CreateStore(intP, outIntPtr);
2262
64
  return fracP;
2263
64
}
2264
2265
Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2266
                         HLOperationLowerHelper &helper,
2267
                         HLObjectOperationLowerHelper *pObjHelper,
2268
56
                         bool &Translated) {
2269
56
  hlsl::OP *hlslOP = &helper.hlslOP;
2270
56
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2271
56
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2272
56
  IRBuilder<> Builder(CI);
2273
56
  Value *sub = Builder.CreateFSub(src0, src1);
2274
56
  return TranslateLength(CI, sub, hlslOP);
2275
56
}
2276
2277
Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2278
                    HLOperationLowerHelper &helper,
2279
                    HLObjectOperationLowerHelper *pObjHelper,
2280
44
                    bool &Translated) {
2281
44
  hlsl::OP *hlslOP = &helper.hlslOP;
2282
44
  IRBuilder<> Builder(CI);
2283
44
  Type *Ty = CI->getType();
2284
44
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2285
44
  Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
2286
44
  if (Ty != Ty->getScalarType()) {
2287
20
    log2eConst =
2288
20
        ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
2289
20
  }
2290
44
  val = Builder.CreateFMul(log2eConst, val);
2291
44
  Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
2292
44
  return exp;
2293
44
}
2294
2295
Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2296
                    HLOperationLowerHelper &helper,
2297
                    HLObjectOperationLowerHelper *pObjHelper,
2298
56
                    bool &Translated) {
2299
56
  hlsl::OP *hlslOP = &helper.hlslOP;
2300
56
  IRBuilder<> Builder(CI);
2301
56
  Type *Ty = CI->getType();
2302
56
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2303
56
  Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
2304
56
  if (Ty != Ty->getScalarType()) {
2305
20
    ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
2306
20
  }
2307
56
  Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
2308
2309
56
  return Builder.CreateFMul(ln2Const, log);
2310
56
}
2311
2312
Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2313
                      HLOperationLowerHelper &helper,
2314
                      HLObjectOperationLowerHelper *pObjHelper,
2315
24
                      bool &Translated) {
2316
24
  hlsl::OP *hlslOP = &helper.hlslOP;
2317
24
  IRBuilder<> Builder(CI);
2318
24
  Type *Ty = CI->getType();
2319
24
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2320
24
  Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
2321
24
  if (Ty != Ty->getScalarType()) {
2322
8
    log2_10Const =
2323
8
        ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
2324
8
  }
2325
24
  Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
2326
2327
24
  return Builder.CreateFMul(log2_10Const, log);
2328
24
}
2329
2330
Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2331
                     HLOperationLowerHelper &helper,
2332
                     HLObjectOperationLowerHelper *pObjHelper,
2333
72
                     bool &Translated) {
2334
72
  hlsl::OP *hlslOP = &helper.hlslOP;
2335
72
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2336
72
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2337
72
  IRBuilder<> Builder(CI);
2338
72
  Value *div = Builder.CreateFDiv(src0, src1);
2339
72
  Value *negDiv = Builder.CreateFNeg(div);
2340
72
  Value *ge = Builder.CreateFCmpOGE(div, negDiv);
2341
72
  Value *absDiv =
2342
72
      TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
2343
72
  Value *frc =
2344
72
      TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
2345
72
  Value *negFrc = Builder.CreateFNeg(frc);
2346
72
  Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
2347
72
  return Builder.CreateFMul(realFrc, src1);
2348
72
}
2349
2350
Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2351
                          HLOperationLowerHelper &helper,
2352
                          HLObjectOperationLowerHelper *pObjHelper,
2353
2.49k
                          bool &Translated) {
2354
2.49k
  bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
2355
2.49k
  if (isFloat) {
2356
1.33k
    switch (IOP) {
2357
832
    case IntrinsicOp::IOP_max:
2358
832
      opcode = OP::OpCode::FMax;
2359
832
      break;
2360
498
    case IntrinsicOp::IOP_min:
2361
498
    default:
2362
498
      DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min);
2363
498
      opcode = OP::OpCode::FMin;
2364
498
      break;
2365
1.33k
    }
2366
1.33k
  }
2367
2.49k
  return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper,
2368
2.49k
                                Translated);
2369
2.49k
}
2370
2371
Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2372
                           HLOperationLowerHelper &helper,
2373
                           HLObjectOperationLowerHelper *pObjHelper,
2374
11.9k
                           bool &Translated) {
2375
11.9k
  bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
2376
11.9k
  if (isFloat) {
2377
11.3k
    switch (IOP) {
2378
11.3k
    case IntrinsicOp::IOP_mad:
2379
11.3k
    default:
2380
11.3k
      DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad);
2381
11.3k
      opcode = OP::OpCode::FMad;
2382
11.3k
      break;
2383
11.3k
    }
2384
11.3k
  }
2385
11.9k
  return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper,
2386
11.9k
                                 Translated);
2387
11.9k
}
2388
2389
Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2390
                      HLOperationLowerHelper &helper,
2391
                      HLObjectOperationLowerHelper *pObjHelper,
2392
60
                      bool &Translated) {
2393
60
  hlsl::OP *hlslOP = &helper.hlslOP;
2394
60
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2395
60
  Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2396
60
  IRBuilder<> Builder(CI);
2397
60
  Type *i32Ty = Type::getInt32Ty(CI->getContext());
2398
60
  Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
2399
60
  Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
2400
60
  Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
2401
60
  Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
2402
60
  Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
2403
60
  Constant *zeroVal = hlslOP->GetFloatConst(0);
2404
  // int iVal = asint(val);
2405
60
  Type *dstTy = i32Ty;
2406
60
  Type *Ty = val->getType();
2407
60
  if (Ty->isVectorTy()) {
2408
28
    unsigned vecSize = Ty->getVectorNumElements();
2409
28
    dstTy = VectorType::get(i32Ty, vecSize);
2410
28
    exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
2411
28
    mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
2412
28
    exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
2413
28
    mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
2414
28
    exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
2415
28
    zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
2416
28
  }
2417
2418
  // bool ne = val != 0;
2419
60
  Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
2420
60
  notZero = Builder.CreateSExt(notZero, dstTy);
2421
2422
60
  Value *intVal = Builder.CreateBitCast(val, dstTy);
2423
  // temp = intVal & exponentMask;
2424
60
  Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
2425
  // temp = temp + exponentBias;
2426
60
  temp = Builder.CreateAdd(temp, exponentBiasConst);
2427
  // temp = temp & ne;
2428
60
  temp = Builder.CreateAnd(temp, notZero);
2429
  // temp = temp >> exponentShift;
2430
60
  temp = Builder.CreateAShr(temp, exponentShiftConst);
2431
  // exp = float(temp);
2432
60
  Value *exp = Builder.CreateSIToFP(temp, Ty);
2433
60
  Builder.CreateStore(exp, expPtr);
2434
  // temp = iVal & mantisaMask;
2435
60
  temp = Builder.CreateAnd(intVal, mantisaMaskConst);
2436
  // temp = temp | mantisaOr;
2437
60
  temp = Builder.CreateOr(temp, mantisaOrConst);
2438
  // mantisa = temp & ne;
2439
60
  Value *mantisa = Builder.CreateAnd(temp, notZero);
2440
60
  return Builder.CreateBitCast(mantisa, Ty);
2441
60
}
2442
2443
Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2444
                      HLOperationLowerHelper &helper,
2445
                      HLObjectOperationLowerHelper *pObjHelper,
2446
38
                      bool &Translated) {
2447
38
  hlsl::OP *hlslOP = &helper.hlslOP;
2448
38
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2449
38
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2450
38
  IRBuilder<> Builder(CI);
2451
38
  Value *exp =
2452
38
      TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
2453
38
  return Builder.CreateFMul(exp, src0);
2454
38
}
2455
2456
Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2457
                       HLOperationLowerHelper &helper,
2458
                       HLObjectOperationLowerHelper *pObjHelper,
2459
36
                       bool &Translated) {
2460
36
  hlsl::OP *hlslOP = &helper.hlslOP;
2461
36
  Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2462
36
  IRBuilder<> Builder(CI);
2463
36
  Value *ddx =
2464
36
      TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
2465
36
  Value *absDdx =
2466
36
      TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
2467
36
  Value *ddy =
2468
36
      TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
2469
36
  Value *absDdy =
2470
36
      TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
2471
36
  return Builder.CreateFAdd(absDdx, absDdy);
2472
36
}
2473
2474
Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2475
                     HLOperationLowerHelper &helper,
2476
                     HLObjectOperationLowerHelper *pObjHelper,
2477
348
                     bool &Translated) {
2478
  // x + s(y-x)
2479
348
  Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
2480
348
  Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
2481
348
  IRBuilder<> Builder(CI);
2482
348
  Value *ySubx = Builder.CreateFSub(y, x);
2483
348
  Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
2484
348
  Value *sMulSub = Builder.CreateFMul(s, ySubx);
2485
348
  return Builder.CreateFAdd(x, sMulSub);
2486
348
}
2487
2488
Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1,
2489
2.37k
                           hlsl::OP *hlslOP, IRBuilder<> &Builder) {
2490
2.37k
  Type *Ty = src0->getType()->getScalarType();
2491
2.37k
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
2492
2.37k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
2493
2494
2.37k
  SmallVector<Value *, 9> args;
2495
2.37k
  args.emplace_back(opArg);
2496
2497
2.37k
  unsigned vecSize = src0->getType()->getVectorNumElements();
2498
9.74k
  for (unsigned i = 0; i < vecSize; 
i++7.36k
)
2499
7.36k
    args.emplace_back(Builder.CreateExtractElement(src0, i));
2500
2501
9.74k
  for (unsigned i = 0; i < vecSize; 
i++7.36k
)
2502
7.36k
    args.emplace_back(Builder.CreateExtractElement(src1, i));
2503
2.37k
  Value *dotOP = Builder.CreateCall(dxilFunc, args);
2504
2505
2.37k
  return dotOP;
2506
2.37k
}
2507
2508
// Instead of using a DXIL intrinsic, implement a dot product operation using
2509
// multiply and add operations. Used for integer dots and long vectors.
2510
Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP,
2511
                 IRBuilder<> &Builder,
2512
376
                 DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) {
2513
376
  Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
2514
376
  Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
2515
376
  Value *Result;
2516
376
  if (Elt0->getType()->isFloatingPointTy())
2517
6
    Result = Builder.CreateFMul(Elt0, Elt1);
2518
370
  else
2519
370
    Result = Builder.CreateMul(Elt0, Elt1);
2520
1.31k
  for (unsigned Elt = 1; Elt < vecSize; 
++Elt938
) {
2521
938
    Elt0 = Builder.CreateExtractElement(arg0, Elt);
2522
938
    Elt1 = Builder.CreateExtractElement(arg1, Elt);
2523
938
    Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP,
2524
938
                                         Builder);
2525
938
  }
2526
2527
376
  return Result;
2528
376
}
2529
2530
Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
2531
2.40k
                     hlsl::OP *hlslOP, IRBuilder<> &Builder) {
2532
2.40k
  switch (vecSize) {
2533
196
  case 2:
2534
196
    return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
2535
0
    break;
2536
1.75k
  case 3:
2537
1.75k
    return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
2538
0
    break;
2539
424
  case 4:
2540
424
    return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
2541
0
    break;
2542
24
  default:
2543
24
    DXASSERT(vecSize == 1, "wrong vector size");
2544
24
    {
2545
24
      Value *vecMul = Builder.CreateFMul(arg0, arg1);
2546
24
      return Builder.CreateExtractElement(vecMul, (uint64_t)0);
2547
0
    }
2548
0
    break;
2549
2.40k
  }
2550
2.40k
}
2551
2552
Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2553
                    HLOperationLowerHelper &helper,
2554
                    HLObjectOperationLowerHelper *pObjHelper,
2555
1.96k
                    bool &Translated) {
2556
1.96k
  hlsl::OP *hlslOP = &helper.hlslOP;
2557
1.96k
  Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2558
1.96k
  Type *Ty = arg0->getType();
2559
1.96k
  unsigned vecSize = Ty->getVectorNumElements();
2560
1.96k
  Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2561
1.96k
  IRBuilder<> Builder(CI);
2562
1.96k
  Type *EltTy = Ty->getScalarType();
2563
1.96k
  if (EltTy->isFloatingPointTy() && 
Ty->getVectorNumElements() <= 41.64k
)
2564
1.63k
    return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
2565
2566
330
  DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
2567
330
  if (IOP == IntrinsicOp::IOP_udot)
2568
228
    MadOpCode = DXIL::OpCode::UMad;
2569
102
  else if (EltTy->isFloatingPointTy())
2570
6
    MadOpCode = DXIL::OpCode::FMad;
2571
330
  return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
2572
1.96k
}
2573
2574
Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2575
                          HLOperationLowerHelper &helper,
2576
                          HLObjectOperationLowerHelper *pObjHelper,
2577
648
                          bool &Translated) {
2578
648
  hlsl::OP *hlslOP = &helper.hlslOP;
2579
648
  Type *Ty = CI->getType();
2580
648
  Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2581
648
  VectorType *VT = cast<VectorType>(Ty);
2582
648
  unsigned vecSize = VT->getNumElements();
2583
2584
648
  IRBuilder<> Builder(CI);
2585
648
  Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder);
2586
648
  DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt;
2587
648
  Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType());
2588
648
  Value *rsqrt = Builder.CreateCall(
2589
648
      dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot},
2590
648
      hlslOP->GetOpCodeName(rsqrtOp));
2591
648
  Value *vecRsqrt = UndefValue::get(VT);
2592
2.60k
  for (unsigned i = 0; i < VT->getNumElements(); 
i++1.95k
)
2593
1.95k
    vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i);
2594
2595
648
  return Builder.CreateFMul(op, vecRsqrt);
2596
648
}
2597
2598
Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2599
                        HLOperationLowerHelper &helper,
2600
                        HLObjectOperationLowerHelper *pObjHelper,
2601
16
                        bool &Translated) {
2602
16
  hlsl::OP *hlslOP = &helper.hlslOP;
2603
  //  v = i - 2 * n * dot(i, n).
2604
16
  IRBuilder<> Builder(CI);
2605
16
  Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
2606
16
  Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
2607
2608
16
  VectorType *VT = cast<VectorType>(i->getType());
2609
16
  unsigned vecSize = VT->getNumElements();
2610
16
  Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
2611
  // 2 * dot (i, n).
2612
16
  dot = Builder.CreateFMul(ConstantFP::get(dot->getType(), 2.0), dot);
2613
  // 2 * n * dot(i, n).
2614
16
  Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
2615
16
  Value *nMulDot = Builder.CreateFMul(vecDot, n);
2616
  // i - 2 * n * dot(i, n).
2617
16
  return Builder.CreateFSub(i, nMulDot);
2618
16
}
2619
2620
Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2621
                        HLOperationLowerHelper &helper,
2622
                        HLObjectOperationLowerHelper *pObjHelper,
2623
46
                        bool &Translated) {
2624
46
  hlsl::OP *hlslOP = &helper.hlslOP;
2625
  //  d = dot(i, n);
2626
  //  t = 1 - eta * eta * ( 1 - d*d);
2627
  //  cond = t >= 1;
2628
  //  r = eta * i - (eta * d + sqrt(t)) * n;
2629
  //  return cond ? r : 0;
2630
46
  IRBuilder<> Builder(CI);
2631
46
  Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
2632
46
  Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
2633
46
  Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
2634
2635
46
  VectorType *VT = cast<VectorType>(i->getType());
2636
46
  unsigned vecSize = VT->getNumElements();
2637
46
  Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
2638
  // eta * eta;
2639
46
  Value *eta2 = Builder.CreateFMul(eta, eta);
2640
  // d*d;
2641
46
  Value *dot2 = Builder.CreateFMul(dot, dot);
2642
46
  Constant *one = ConstantFP::get(eta->getType(), 1);
2643
46
  Constant *zero = ConstantFP::get(eta->getType(), 0);
2644
  // 1- d*d;
2645
46
  dot2 = Builder.CreateFSub(one, dot2);
2646
  // eta * eta * (1-d*d);
2647
46
  eta2 = Builder.CreateFMul(dot2, eta2);
2648
  // t = 1 - eta * eta * ( 1 - d*d);
2649
46
  Value *t = Builder.CreateFSub(one, eta2);
2650
  // cond = t >= 0;
2651
46
  Value *cond = Builder.CreateFCmpOGE(t, zero);
2652
  // eta * i;
2653
46
  Value *vecEta = UndefValue::get(VT);
2654
176
  for (unsigned i = 0; i < vecSize; 
i++130
)
2655
130
    vecEta = Builder.CreateInsertElement(vecEta, eta, i);
2656
46
  Value *etaMulI = Builder.CreateFMul(i, vecEta);
2657
  // sqrt(t);
2658
46
  Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
2659
  // eta * d;
2660
46
  Value *etaMulD = Builder.CreateFMul(eta, dot);
2661
  // eta * d + sqrt(t);
2662
46
  Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
2663
  // (eta * d + sqrt(t)) * n;
2664
46
  Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
2665
46
  Value *r = Builder.CreateFMul(vecEtaSqrt, n);
2666
  // r = eta * i - (eta * d + sqrt(t)) * n;
2667
46
  r = Builder.CreateFSub(etaMulI, r);
2668
46
  Value *refract =
2669
46
      Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
2670
46
  return refract;
2671
46
}
2672
2673
Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2674
                           HLOperationLowerHelper &helper,
2675
                           HLObjectOperationLowerHelper *pObjHelper,
2676
60
                           bool &Translated) {
2677
60
  hlsl::OP *hlslOP = &helper.hlslOP;
2678
  // s = saturate((x-min)/(max-min)).
2679
60
  IRBuilder<> Builder(CI);
2680
60
  Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
2681
60
  Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
2682
60
  Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
2683
60
  Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
2684
60
  Value *xSubMin = Builder.CreateFSub(x, minVal);
2685
60
  Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
2686
2687
60
  Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
2688
60
                                       Builder);
2689
  // return s * s *(3-2*s).
2690
60
  Constant *c2 = ConstantFP::get(CI->getType(), 2);
2691
60
  Constant *c3 = ConstantFP::get(CI->getType(), 3);
2692
2693
60
  Value *sMul2 = Builder.CreateFMul(s, c2);
2694
60
  Value *result = Builder.CreateFSub(c3, sMul2);
2695
60
  result = Builder.CreateFMul(s, result);
2696
60
  result = Builder.CreateFMul(s, result);
2697
60
  return result;
2698
60
}
2699
2700
Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2701
                      HLOperationLowerHelper &helper,
2702
                      HLObjectOperationLowerHelper *pObjHelper,
2703
16
                      bool &Translated) {
2704
16
  hlsl::OP *hlslOP = &helper.hlslOP;
2705
16
  Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
2706
16
  Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
2707
16
  Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
2708
16
  Type *Ty = CI->getType();
2709
16
  IRBuilder<> Builder(CI);
2710
16
  Value *vecRef = UndefValue::get(Ty);
2711
80
  for (unsigned i = 0; i < 4; 
i++64
)
2712
64
    vecRef = Builder.CreateInsertElement(vecRef, ref, i);
2713
2714
16
  Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
2715
16
  Value *srcY = Builder.CreateExtractElement(src, 1);
2716
2717
16
  Value *byteSrc = UndefValue::get(Ty);
2718
16
  byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
2719
2720
  // ushr r0.yzw, srcX, l(0, 8, 16, 24)
2721
  // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
2722
16
  Value *bfiOpArg =
2723
16
      hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
2724
2725
16
  Value *imm8 = hlslOP->GetU32Const(8);
2726
16
  Value *imm16 = hlslOP->GetU32Const(16);
2727
16
  Value *imm24 = hlslOP->GetU32Const(24);
2728
2729
16
  Ty = ref->getType();
2730
  // Get x[31:8].
2731
16
  Value *srcXShift = Builder.CreateLShr(srcX, imm8);
2732
  // y[0~7] x[31:8].
2733
16
  Value *byteSrcElt = TrivialDxilOperation(
2734
16
      DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
2735
16
      hlslOP, Builder);
2736
16
  byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
2737
  // Get x[31:16].
2738
16
  srcXShift = Builder.CreateLShr(srcXShift, imm8);
2739
  // y[0~15] x[31:16].
2740
16
  byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
2741
16
                                    {bfiOpArg, imm16, imm16, srcY, srcXShift},
2742
16
                                    Ty, Ty, hlslOP, Builder);
2743
16
  byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
2744
  // Get x[31:24].
2745
16
  srcXShift = Builder.CreateLShr(srcXShift, imm8);
2746
  // y[0~23] x[31:24].
2747
16
  byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
2748
16
                                    {bfiOpArg, imm24, imm8, srcY, srcXShift},
2749
16
                                    Ty, Ty, hlslOP, Builder);
2750
16
  byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
2751
2752
  // Msad on vecref and byteSrc.
2753
16
  return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
2754
16
                                     hlslOP, Builder);
2755
16
}
2756
2757
Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2758
                    HLOperationLowerHelper &helper,
2759
                    HLObjectOperationLowerHelper *pObjHelper,
2760
76
                    bool &Translated) {
2761
76
  Type *Ty = CI->getType();
2762
76
  Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2763
76
  IRBuilder<> Builder(CI);
2764
76
  Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
2765
76
  if (Ty != Ty->getScalarType()) {
2766
56
    one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
2767
56
  }
2768
76
  return Builder.CreateFDiv(one, op);
2769
76
}
2770
2771
Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2772
                     HLOperationLowerHelper &helper,
2773
                     HLObjectOperationLowerHelper *pObjHelper,
2774
180
                     bool &Translated) {
2775
180
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2776
180
  Type *Ty = val->getType();
2777
180
  bool IsInt = Ty->getScalarType()->isIntegerTy();
2778
2779
180
  IRBuilder<> Builder(CI);
2780
180
  Constant *zero = Constant::getNullValue(Ty);
2781
180
  Value *zeroLtVal = IsInt ? 
Builder.CreateICmpSLT(zero, val)44
2782
180
                           : 
Builder.CreateFCmpOLT(zero, val)136
;
2783
180
  Value *valLtZero = IsInt ? 
Builder.CreateICmpSLT(val, zero)44
2784
180
                           : 
Builder.CreateFCmpOLT(val, zero)136
;
2785
180
  zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
2786
180
  valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
2787
180
  return Builder.CreateSub(zeroLtVal, valLtZero);
2788
180
}
2789
2790
Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2791
                      HLOperationLowerHelper &helper,
2792
                      HLObjectOperationLowerHelper *pObjHelper,
2793
36
                      bool &Translated) {
2794
36
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2795
36
  Type *Ty = val->getType();
2796
2797
36
  IRBuilder<> Builder(CI);
2798
36
  Constant *zero = Constant::getNullValue(Ty);
2799
36
  Value *nonZero = Builder.CreateICmpNE(val, zero);
2800
36
  return Builder.CreateZExt(nonZero, CI->getType());
2801
36
}
2802
2803
Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2804
                     HLOperationLowerHelper &helper,
2805
                     HLObjectOperationLowerHelper *pObjHelper,
2806
36
                     bool &Translated) {
2807
36
  Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2808
36
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2809
36
  Type *Ty = CI->getType();
2810
36
  IRBuilder<> Builder(CI);
2811
2812
36
  Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
2813
36
  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
2814
36
  Value *cond = Builder.CreateFCmpOLT(x, edge);
2815
2816
36
  if (Ty != Ty->getScalarType()) {
2817
20
    one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
2818
20
    zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
2819
20
  }
2820
2821
36
  return Builder.CreateSelect(cond, zero, one);
2822
36
}
2823
2824
Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2825
                    HLOperationLowerHelper &helper,
2826
                    HLObjectOperationLowerHelper *pObjHelper,
2827
1.43k
                    bool &Translated) {
2828
1.43k
  hlsl::OP *hlslOP = &helper.hlslOP;
2829
1.43k
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2830
1.43k
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2831
1.43k
  bool isFXCCompatMode =
2832
1.43k
      CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
2833
1.43k
  IRBuilder<> Builder(CI);
2834
1.43k
  return TranslatePowImpl(hlslOP, Builder, x, y, isFXCCompatMode);
2835
1.43k
}
2836
2837
Value *TranslatePrintf(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
2838
                       HLOperationLowerHelper &helper,
2839
                       HLObjectOperationLowerHelper *pObjHelper,
2840
2
                       bool &Translated) {
2841
2
  Translated = false;
2842
2
  dxilutil::EmitErrorOnInstruction(CI,
2843
2
                                   "use of unsupported identifier 'printf'");
2844
2
  return nullptr;
2845
2
}
2846
2847
Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2848
                            HLOperationLowerHelper &helper,
2849
                            HLObjectOperationLowerHelper *pObjHelper,
2850
16
                            bool &Translated) {
2851
16
  hlsl::OP *hlslOP = &helper.hlslOP;
2852
16
  Type *Ty = CI->getType();
2853
2854
16
  Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
2855
16
  Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
2856
16
  Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
2857
16
  IRBuilder<> Builder(CI);
2858
2859
16
  unsigned vecSize = Ty->getVectorNumElements();
2860
  // -n x sign(dot(i, ng)).
2861
16
  Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
2862
2863
16
  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
2864
16
  Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
2865
2866
16
  Value *negN = Builder.CreateFNeg(n);
2867
16
  Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
2868
16
  return faceforward;
2869
16
}
2870
2871
Value *TrivialSetMeshOutputCounts(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2872
                                  HLOperationLowerHelper &helper,
2873
                                  HLObjectOperationLowerHelper *pObjHelper,
2874
246
                                  bool &Translated) {
2875
246
  hlsl::OP *hlslOP = &helper.hlslOP;
2876
246
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2877
246
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2878
246
  IRBuilder<> Builder(CI);
2879
246
  Constant *opArg = hlslOP->GetU32Const((unsigned)op);
2880
246
  Value *args[] = {opArg, src0, src1};
2881
246
  Function *dxilFunc = hlslOP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
2882
2883
246
  Builder.CreateCall(dxilFunc, args);
2884
246
  return nullptr;
2885
246
}
2886
2887
Value *TrivialDispatchMesh(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2888
                           HLOperationLowerHelper &helper,
2889
                           HLObjectOperationLowerHelper *pObjHelper,
2890
246
                           bool &Translated) {
2891
246
  hlsl::OP *hlslOP = &helper.hlslOP;
2892
246
  Value *src0 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadX);
2893
246
  Value *src1 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadY);
2894
246
  Value *src2 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadZ);
2895
246
  Value *src3 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpPayload);
2896
246
  IRBuilder<> Builder(CI);
2897
246
  Constant *opArg = hlslOP->GetU32Const((unsigned)op);
2898
246
  Value *args[] = {opArg, src0, src1, src2, src3};
2899
246
  Function *dxilFunc = hlslOP->GetOpFunc(op, src3->getType());
2900
2901
246
  Builder.CreateCall(dxilFunc, args);
2902
246
  return nullptr;
2903
246
}
2904
} // namespace
2905
2906
// MOP intrinsics
2907
namespace {
2908
2909
Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2910
                                  HLOperationLowerHelper &helper,
2911
                                  HLObjectOperationLowerHelper *pObjHelper,
2912
48
                                  bool &Translated) {
2913
48
  hlsl::OP *hlslOP = &helper.hlslOP;
2914
48
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
2915
2916
48
  IRBuilder<> Builder(CI);
2917
48
  Value *sampleIdx =
2918
48
      CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
2919
2920
48
  OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
2921
48
  llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
2922
48
  Function *dxilFunc =
2923
48
      hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
2924
2925
48
  Value *args[] = {opArg, handle, sampleIdx};
2926
48
  Value *samplePos = Builder.CreateCall(dxilFunc, args);
2927
2928
48
  Value *result = UndefValue::get(CI->getType());
2929
48
  Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
2930
48
  Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
2931
48
  result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
2932
48
  result = Builder.CreateInsertElement(result, samplePosY, 1);
2933
48
  return result;
2934
48
}
2935
2936
Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2937
                              HLOperationLowerHelper &helper,
2938
                              HLObjectOperationLowerHelper *pObjHelper,
2939
226
                              bool &Translated) {
2940
226
  hlsl::OP *hlslOP = &helper.hlslOP;
2941
2942
226
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
2943
226
  DxilResource::Kind RK = pObjHelper->GetRK(handle);
2944
2945
226
  IRBuilder<> Builder(CI);
2946
226
  OP::OpCode opcode = OP::OpCode::GetDimensions;
2947
226
  llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
2948
226
  Function *dxilFunc =
2949
226
      hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
2950
2951
226
  Type *i32Ty = Type::getInt32Ty(CI->getContext());
2952
226
  Value *mipLevel = UndefValue::get(i32Ty);
2953
226
  unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
2954
226
  switch (RK) {
2955
0
  case DxilResource::Kind::Texture1D:
2956
0
  case DxilResource::Kind::Texture1DArray:
2957
56
  case DxilResource::Kind::Texture2D:
2958
56
  case DxilResource::Kind::Texture2DArray:
2959
66
  case DxilResource::Kind::TextureCube:
2960
66
  case DxilResource::Kind::TextureCubeArray:
2961
66
  case DxilResource::Kind::Texture3D: {
2962
66
    Value *opMipLevel =
2963
66
        CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
2964
    // mipLevel is in parameter, should not be pointer.
2965
66
    if (!opMipLevel->getType()->isPointerTy())
2966
24
      mipLevel = opMipLevel;
2967
42
    else {
2968
      // No mip level.
2969
42
      widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
2970
42
      mipLevel = ConstantInt::get(i32Ty, 0);
2971
42
    }
2972
66
  } break;
2973
160
  default:
2974
160
    widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
2975
160
    break;
2976
226
  }
2977
226
  Value *args[] = {opArg, handle, mipLevel};
2978
226
  Value *dims = Builder.CreateCall(dxilFunc, args);
2979
2980
226
  unsigned dimensionIdx = 0;
2981
2982
226
  Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
2983
226
  Value *widthPtr = CI->getArgOperand(widthOpIdx);
2984
226
  if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
2985
8
    width = Builder.CreateSIToFP(width,
2986
8
                                 widthPtr->getType()->getPointerElementType());
2987
2988
226
  Builder.CreateStore(width, widthPtr);
2989
2990
226
  if (DXIL::IsStructuredBuffer(RK)) {
2991
    // Set stride.
2992
52
    Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
2993
52
    const DataLayout &DL = helper.dataLayout;
2994
52
    Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
2995
52
    Type *bufTy = pObjHelper->GetResourceType(handle);
2996
52
    Type *bufRetTy = bufTy->getStructElementType(0);
2997
52
    unsigned stride = DL.getTypeAllocSize(bufRetTy);
2998
52
    Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
2999
174
  } else {
3000
174
    if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
3001
        // Samples is in w channel too.
3002
174
        
RK == DXIL::ResourceKind::Texture2DMS150
) {
3003
      // Has mip.
3004
68
      for (unsigned argIdx = widthOpIdx + 1;
3005
136
           argIdx < CI->getNumArgOperands() - 1; 
argIdx++68
) {
3006
68
        Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
3007
68
        Value *ptr = CI->getArgOperand(argIdx);
3008
68
        if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
3009
0
          dim = Builder.CreateSIToFP(dim,
3010
0
                                     ptr->getType()->getPointerElementType());
3011
68
        Builder.CreateStore(dim, ptr);
3012
68
      }
3013
      // NumOfLevel is in w channel.
3014
68
      dimensionIdx = 3;
3015
68
      Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
3016
68
      Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
3017
68
      if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
3018
0
        dim =
3019
0
            Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
3020
68
      Builder.CreateStore(dim, ptr);
3021
106
    } else {
3022
292
      for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
3023
186
           argIdx++) {
3024
186
        Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
3025
186
        Value *ptr = CI->getArgOperand(argIdx);
3026
186
        if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
3027
8
          dim = Builder.CreateSIToFP(dim,
3028
8
                                     ptr->getType()->getPointerElementType());
3029
186
        Builder.CreateStore(dim, ptr);
3030
186
      }
3031
106
    }
3032
174
  }
3033
226
  return nullptr;
3034
226
}
3035
3036
Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3037
                             HLOperationLowerHelper &helper,
3038
                             HLObjectOperationLowerHelper *pObjHelper,
3039
2.94k
                             bool &Translated) {
3040
2.94k
  hlsl::OP *hlslOP = &helper.hlslOP;
3041
2.94k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3042
3043
2.94k
  pObjHelper->MarkHasCounter(handle, helper.i8Ty);
3044
3045
2.94k
  bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
3046
2.94k
  IRBuilder<> Builder(CI);
3047
3048
2.94k
  OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
3049
2.94k
  Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
3050
2.94k
  Value *IncVal = hlslOP->GetI8Const(bInc ? 
12.65k
:
-1284
);
3051
  // Create BufferUpdateCounter call.
3052
2.94k
  Value *Args[] = {OpCodeArg, handle, IncVal};
3053
3054
2.94k
  Function *F =
3055
2.94k
      hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
3056
2.94k
  return Builder.CreateCall(F, Args);
3057
2.94k
}
3058
3059
static Value *ScalarizeResRet(Type *RetTy, Value *ResRet,
3060
5.73k
                              IRBuilder<> &Builder) {
3061
  // Extract value part.
3062
5.73k
  Value *retVal = llvm::UndefValue::get(RetTy);
3063
5.73k
  if (RetTy->isVectorTy()) {
3064
24.0k
    for (unsigned i = 0; i < RetTy->getVectorNumElements(); 
i++19.1k
) {
3065
19.1k
      Value *retComp = Builder.CreateExtractValue(ResRet, i);
3066
19.1k
      retVal = Builder.CreateInsertElement(retVal, retComp, i);
3067
19.1k
    }
3068
4.86k
  } else {
3069
870
    retVal = Builder.CreateExtractValue(ResRet, 0);
3070
870
  }
3071
5.73k
  return retVal;
3072
5.73k
}
3073
3074
void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
3075
                  hlsl::OP *hlslOp,
3076
19.7k
                  unsigned StatusIndex = DXIL::kResRetStatusIndex) {
3077
19.7k
  if (status && 
!isa<UndefValue>(status)2.24k
) {
3078
2.24k
    Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex);
3079
2.24k
    Value *checkAccessOp = hlslOp->GetI32Const(
3080
2.24k
        static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped));
3081
2.24k
    Function *checkAccessFn = hlslOp->GetOpFunc(
3082
2.24k
        DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType());
3083
    // CheckAccess on status.
3084
2.24k
    Value *bStatus =
3085
2.24k
        Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal});
3086
2.24k
    Value *extStatus =
3087
2.24k
        Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext()));
3088
2.24k
    Builder.CreateStore(extStatus, status);
3089
2.24k
  }
3090
19.7k
}
3091
3092
3.15k
Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
3093
3.15k
  Value *Result = UndefValue::get(DstTy);
3094
10.1k
  for (unsigned i = 0; i < DstTy->getVectorNumElements(); 
i++6.97k
)
3095
6.97k
    Result = Builder.CreateInsertElement(Result, Elt, i);
3096
3.15k
  return Result;
3097
3.15k
}
3098
3099
Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3100
                    HLOperationLowerHelper &helper,
3101
                    HLObjectOperationLowerHelper *pObjHelper,
3102
140
                    bool &Translated) {
3103
3104
140
  hlsl::OP *hlslOP = &helper.hlslOP;
3105
140
  Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
3106
140
  Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
3107
140
  Type *arg0Ty = arg0->getType();
3108
140
  Type *arg1Ty = arg1->getType();
3109
140
  IRBuilder<> Builder(CI);
3110
3111
140
  if (arg0Ty->isVectorTy()) {
3112
104
    if (arg1Ty->isVectorTy()) {
3113
      // mul(vector, vector) == dot(vector, vector)
3114
84
      unsigned vecSize = arg0Ty->getVectorNumElements();
3115
84
      if (arg0Ty->getScalarType()->isFloatingPointTy()) {
3116
38
        return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
3117
38
      }
3118
3119
46
      DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
3120
46
      if (IOP == IntrinsicOp::IOP_umul)
3121
20
        MadOpCode = DXIL::OpCode::UMad;
3122
46
      return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
3123
84
    } else {
3124
      // mul(vector, scalar) == vector * scalar-splat
3125
20
      arg1 = SplatToVector(arg1, arg0Ty, Builder);
3126
20
    }
3127
104
  } else {
3128
36
    if (arg1Ty->isVectorTy()) {
3129
      // mul(scalar, vector) == scalar-splat * vector
3130
24
      arg0 = SplatToVector(arg0, arg1Ty, Builder);
3131
24
    }
3132
    // else mul(scalar, scalar) == scalar * scalar;
3133
36
  }
3134
3135
  // create fmul/mul for the pair of vectors or scalars
3136
56
  if (arg0Ty->getScalarType()->isFloatingPointTy()) {
3137
26
    return Builder.CreateFMul(arg0, arg1);
3138
26
  }
3139
30
  return Builder.CreateMul(arg0, arg1);
3140
56
}
3141
3142
// Sample intrinsics.
3143
struct SampleHelper {
3144
  SampleHelper(CallInst *CI, OP::OpCode op,
3145
               HLObjectOperationLowerHelper *pObjHelper);
3146
3147
  OP::OpCode opcode = OP::OpCode::NumOpCodes;
3148
  DXIL::ResourceKind resourceKind = DXIL::ResourceKind::Invalid;
3149
  Value *sampledTexHandle = nullptr;
3150
  Value *texHandle = nullptr;
3151
  Value *samplerHandle = nullptr;
3152
  static const unsigned kMaxCoordDimensions = 4;
3153
  unsigned coordDimensions = 0;
3154
  Value *coord[kMaxCoordDimensions];
3155
  Value *compareValue = nullptr;
3156
  Value *bias = nullptr;
3157
  Value *lod = nullptr;
3158
  // SampleGrad only.
3159
  static const unsigned kMaxDDXYDimensions = 3;
3160
  Value *ddx[kMaxDDXYDimensions];
3161
  Value *ddy[kMaxDDXYDimensions];
3162
  // Optional.
3163
  static const unsigned kMaxOffsetDimensions = 3;
3164
  unsigned offsetDimensions = 0;
3165
  Value *offset[kMaxOffsetDimensions];
3166
  Value *clamp = nullptr;
3167
  Value *status = nullptr;
3168
  unsigned maxHLOperandRead = 0;
3169
19.3k
  Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
3170
19.3k
    if (CI->getNumArgOperands() > opIdx) {
3171
8.85k
      maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
3172
8.85k
      return CI->getArgOperand(opIdx);
3173
8.85k
    }
3174
10.4k
    return nullptr;
3175
19.3k
  }
3176
4.80k
  void TranslateCoord(CallInst *CI, unsigned coordIdx) {
3177
4.80k
    Value *coordArg = ReadHLOperand(CI, coordIdx);
3178
4.80k
    DXASSERT_NOMSG(coordArg);
3179
4.80k
    DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
3180
4.80k
             "otherwise, HL coordinate dimensions mismatch");
3181
4.80k
    IRBuilder<> Builder(CI);
3182
15.3k
    for (unsigned i = 0; i < coordDimensions; 
i++10.5k
)
3183
10.5k
      coord[i] = Builder.CreateExtractElement(coordArg, i);
3184
4.80k
    Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
3185
13.5k
    for (unsigned i = coordDimensions; i < kMaxCoordDimensions; 
i++8.69k
)
3186
8.69k
      coord[i] = undefF;
3187
4.80k
  }
3188
4.34k
  void TranslateOffset(CallInst *CI, unsigned offsetIdx) {
3189
4.34k
    IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
3190
4.34k
    if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
3191
670
      DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
3192
670
               "otherwise, HL coordinate dimensions mismatch");
3193
670
      IRBuilder<> Builder(CI);
3194
1.96k
      for (unsigned i = 0; i < offsetDimensions; 
i++1.29k
)
3195
1.29k
        offset[i] = Builder.CreateExtractElement(offsetArg, i);
3196
3.67k
    } else {
3197
      // Use zeros for offsets when not specified, not undef.
3198
3.67k
      Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
3199
10.1k
      for (unsigned i = 0; i < offsetDimensions; 
i++6.48k
)
3200
6.48k
        offset[i] = zero;
3201
3.67k
    }
3202
    // Use undef for components that should not be used for this resource dim.
3203
4.34k
    Value *undefI = UndefValue::get(i32Ty);
3204
9.60k
    for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; 
i++5.25k
)
3205
5.25k
      offset[i] = undefI;
3206
4.34k
  }
3207
292
  void SetBias(CallInst *CI, unsigned biasIdx) {
3208
    // Clamp bias for immediate.
3209
292
    bias = ReadHLOperand(CI, biasIdx);
3210
292
    DXASSERT_NOMSG(bias);
3211
292
    if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
3212
208
      float v = FP->getValueAPF().convertToFloat();
3213
208
      if (v > DXIL::kMaxMipLodBias)
3214
16
        bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
3215
208
      if (v < DXIL::kMinMipLodBias)
3216
24
        bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
3217
208
    }
3218
292
  }
3219
1.21k
  void SetLOD(CallInst *CI, unsigned lodIdx) {
3220
1.21k
    lod = ReadHLOperand(CI, lodIdx);
3221
1.21k
    DXASSERT_NOMSG(lod);
3222
1.21k
  }
3223
634
  void SetCompareValue(CallInst *CI, unsigned cmpIdx) {
3224
634
    compareValue = ReadHLOperand(CI, cmpIdx);
3225
634
    DXASSERT_NOMSG(compareValue);
3226
634
  }
3227
3.28k
  void SetClamp(CallInst *CI, unsigned clampIdx) {
3228
3.28k
    if ((clamp = ReadHLOperand(CI, clampIdx))) {
3229
492
      if (clamp->getType()->isVectorTy()) {
3230
0
        IRBuilder<> Builder(CI);
3231
0
        clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
3232
0
      }
3233
492
    } else
3234
2.79k
      clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
3235
3.28k
  }
3236
4.34k
  void SetStatus(CallInst *CI, unsigned statusIdx) {
3237
4.34k
    status = ReadHLOperand(CI, statusIdx);
3238
4.34k
  }
3239
200
  void SetDDX(CallInst *CI, unsigned ddxIdx) {
3240
200
    SetDDXY(CI, ddx, ReadHLOperand(CI, ddxIdx));
3241
200
  }
3242
200
  void SetDDY(CallInst *CI, unsigned ddyIdx) {
3243
200
    SetDDXY(CI, ddy, ReadHLOperand(CI, ddyIdx));
3244
200
  }
3245
400
  void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg) {
3246
400
    DXASSERT_NOMSG(ddxyArg);
3247
400
    IRBuilder<> Builder(CI);
3248
400
    unsigned ddxySize = ddxyArg->getType()->getVectorNumElements();
3249
1.32k
    for (unsigned i = 0; i < ddxySize; 
i++928
)
3250
928
      ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
3251
400
    Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
3252
672
    for (unsigned i = ddxySize; i < kMaxDDXYDimensions; 
i++272
)
3253
272
      ddxy[i] = undefF;
3254
400
  }
3255
};
3256
3257
SampleHelper::SampleHelper(CallInst *CI, OP::OpCode op,
3258
                           HLObjectOperationLowerHelper *pObjHelper)
3259
4.80k
    : opcode(op) {
3260
3261
4.80k
  texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3262
4.80k
  resourceKind = pObjHelper->GetRK(texHandle);
3263
4.80k
  if (resourceKind == DXIL::ResourceKind::Invalid) {
3264
0
    opcode = DXIL::OpCode::NumOpCodes;
3265
0
    return;
3266
0
  }
3267
3268
4.80k
  coordDimensions = opcode == DXIL::OpCode::CalculateLOD
3269
4.80k
                        ? 
DxilResource::GetNumDimensionsForCalcLOD(resourceKind)164
3270
4.80k
                        : 
DxilResource::GetNumCoords(resourceKind)4.64k
;
3271
4.80k
  offsetDimensions = DxilResource::GetNumOffsets(resourceKind);
3272
3273
4.80k
  const bool bFeedbackOp = hlsl::OP::IsDxilOpFeedback(op);
3274
4.80k
  sampledTexHandle =
3275
4.80k
      bFeedbackOp ? CI->getArgOperand(
3276
300
                        HLOperandIndex::kWriteSamplerFeedbackSampledArgIndex)
3277
4.80k
                  : 
nullptr4.50k
;
3278
4.80k
  const unsigned kSamplerArgIndex =
3279
4.80k
      bFeedbackOp ? 
HLOperandIndex::kWriteSamplerFeedbackSamplerArgIndex300
3280
4.80k
                  : 
HLOperandIndex::kSampleSamplerArgIndex4.50k
;
3281
4.80k
  samplerHandle = CI->getArgOperand(kSamplerArgIndex);
3282
3283
4.80k
  const unsigned kCoordArgIdx =
3284
4.80k
      bFeedbackOp ? 
HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex300
3285
4.80k
                  : 
HLOperandIndex::kSampleCoordArgIndex4.50k
;
3286
4.80k
  TranslateCoord(CI, kCoordArgIdx);
3287
3288
  // TextureCube does not support offsets, shifting each subsequent arg index
3289
  // down by 1
3290
4.80k
  unsigned cube = (resourceKind == DXIL::ResourceKind::TextureCube ||
3291
4.80k
                   
resourceKind == DXIL::ResourceKind::TextureCubeArray4.61k
)
3292
4.80k
                      ? 
1402
3293
4.80k
                      : 
04.40k
;
3294
3295
4.80k
  switch (op) {
3296
2.41k
  case OP::OpCode::Sample:
3297
2.41k
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx66
3298
2.41k
                             : 
HLOperandIndex::kSampleOffsetArgIndex2.34k
);
3299
2.41k
    SetClamp(CI, HLOperandIndex::kSampleClampArgIndex - cube);
3300
2.41k
    SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex - cube);
3301
2.41k
    break;
3302
1.00k
  case OP::OpCode::SampleLevel:
3303
1.00k
    SetLOD(CI, HLOperandIndex::kSampleLLevelArgIndex);
3304
1.00k
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx44
3305
1.00k
                             : 
HLOperandIndex::kSampleLOffsetArgIndex956
);
3306
1.00k
    SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex - cube);
3307
1.00k
    break;
3308
180
  case OP::OpCode::SampleBias:
3309
180
    SetBias(CI, HLOperandIndex::kSampleBBiasArgIndex);
3310
180
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx48
3311
180
                             : 
HLOperandIndex::kSampleBOffsetArgIndex132
);
3312
180
    SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex - cube);
3313
180
    SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex - cube);
3314
180
    break;
3315
206
  case OP::OpCode::SampleCmp:
3316
206
    SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex);
3317
206
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx66
3318
206
                             : 
HLOperandIndex::kSampleCmpOffsetArgIndex140
);
3319
206
    SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex - cube);
3320
206
    SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube);
3321
206
    break;
3322
48
  case OP::OpCode::SampleCmpBias:
3323
48
    SetBias(CI, HLOperandIndex::kSampleCmpBBiasArgIndex);
3324
48
    SetCompareValue(CI, HLOperandIndex::kSampleCmpBCmpValArgIndex);
3325
48
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx8
3326
48
                             : 
HLOperandIndex::kSampleCmpBOffsetArgIndex40
);
3327
48
    SetClamp(CI, HLOperandIndex::kSampleCmpBClampArgIndex - cube);
3328
48
    SetStatus(CI, HLOperandIndex::kSampleCmpBStatusArgIndex - cube);
3329
48
    break;
3330
48
  case OP::OpCode::SampleCmpGrad:
3331
48
    SetDDX(CI, HLOperandIndex::kSampleCmpGDDXArgIndex);
3332
48
    SetDDY(CI, HLOperandIndex::kSampleCmpGDDYArgIndex);
3333
48
    SetCompareValue(CI, HLOperandIndex::kSampleCmpGCmpValArgIndex);
3334
48
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx16
3335
48
                             : 
HLOperandIndex::kSampleCmpGOffsetArgIndex32
);
3336
48
    SetClamp(CI, HLOperandIndex::kSampleCmpGClampArgIndex - cube);
3337
48
    SetStatus(CI, HLOperandIndex::kSampleCmpGStatusArgIndex - cube);
3338
48
    break;
3339
192
  case OP::OpCode::SampleCmpLevel:
3340
192
    SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex);
3341
192
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx48
3342
192
                             : 
HLOperandIndex::kSampleCmpLOffsetArgIndex144
);
3343
192
    SetLOD(CI, HLOperandIndex::kSampleCmpLLevelArgIndex);
3344
192
    SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube);
3345
192
    break;
3346
140
  case OP::OpCode::SampleCmpLevelZero:
3347
140
    SetCompareValue(CI, HLOperandIndex::kSampleCmpLZCmpValArgIndex);
3348
140
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx32
3349
140
                             : 
HLOperandIndex::kSampleCmpLZOffsetArgIndex108
);
3350
140
    SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex - cube);
3351
140
    break;
3352
120
  case OP::OpCode::SampleGrad:
3353
120
    SetDDX(CI, HLOperandIndex::kSampleGDDXArgIndex);
3354
120
    SetDDY(CI, HLOperandIndex::kSampleGDDYArgIndex);
3355
120
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx48
3356
120
                             : 
HLOperandIndex::kSampleGOffsetArgIndex72
);
3357
120
    SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex - cube);
3358
120
    SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex - cube);
3359
120
    break;
3360
164
  case OP::OpCode::CalculateLOD:
3361
    // Only need coord for LOD calculation.
3362
164
    break;
3363
180
  case OP::OpCode::WriteSamplerFeedback:
3364
180
    SetClamp(CI, HLOperandIndex::kWriteSamplerFeedback_ClampArgIndex);
3365
180
    break;
3366
64
  case OP::OpCode::WriteSamplerFeedbackBias:
3367
64
    SetBias(CI, HLOperandIndex::kWriteSamplerFeedbackBias_BiasArgIndex);
3368
64
    SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackBias_ClampArgIndex);
3369
64
    break;
3370
32
  case OP::OpCode::WriteSamplerFeedbackGrad:
3371
32
    SetDDX(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdxArgIndex);
3372
32
    SetDDY(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdyArgIndex);
3373
32
    SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_ClampArgIndex);
3374
32
    break;
3375
24
  case OP::OpCode::WriteSamplerFeedbackLevel:
3376
24
    SetLOD(CI, HLOperandIndex::kWriteSamplerFeedbackLevel_LodArgIndex);
3377
24
    break;
3378
0
  default:
3379
0
    DXASSERT(0, "invalid opcode for Sample");
3380
0
    break;
3381
4.80k
  }
3382
4.80k
  DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
3383
4.80k
           "otherwise, unused HL arguments for Sample op");
3384
4.80k
}
3385
3386
Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3387
                             HLOperationLowerHelper &helper,
3388
                             HLObjectOperationLowerHelper *pObjHelper,
3389
164
                             bool &Translated) {
3390
164
  hlsl::OP *hlslOP = &helper.hlslOP;
3391
164
  SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
3392
164
  if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
3393
0
    Translated = false;
3394
0
    return nullptr;
3395
0
  }
3396
3397
164
  bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
3398
164
  IRBuilder<> Builder(CI);
3399
164
  Value *opArg =
3400
164
      hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
3401
164
  Value *clamped = hlslOP->GetI1Const(bClamped);
3402
3403
164
  Value *args[] = {opArg,
3404
164
                   sampleHelper.texHandle,
3405
164
                   sampleHelper.samplerHandle,
3406
164
                   sampleHelper.coord[0],
3407
164
                   sampleHelper.coord[1],
3408
164
                   sampleHelper.coord[2],
3409
164
                   clamped};
3410
164
  Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
3411
164
                                         Type::getFloatTy(opArg->getContext()));
3412
164
  Value *LOD = Builder.CreateCall(dxilFunc, args);
3413
164
  return LOD;
3414
164
}
3415
3416
Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3417
                            HLOperationLowerHelper &helper,
3418
                            HLObjectOperationLowerHelper *pObjHelper,
3419
456
                            bool &Translated) {
3420
  // Translate CheckAccess into uint->bool, later optimization should remove it.
3421
  // Real checkaccess is generated in UpdateStatus.
3422
456
  IRBuilder<> Builder(CI);
3423
456
  Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
3424
456
  return Builder.CreateTrunc(V, helper.i1Ty);
3425
456
}
3426
3427
void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
3428
4.34k
                        Value *status, hlsl::OP *hlslOp) {
3429
4.34k
  IRBuilder<> Builder(CI);
3430
3431
4.34k
  CallInst *call = Builder.CreateCall(F, sampleArgs);
3432
3433
4.34k
  dxilutil::MigrateDebugValue(CI, call);
3434
3435
  // extract value part
3436
4.34k
  Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
3437
3438
  // Replace ret val.
3439
4.34k
  CI->replaceAllUsesWith(retVal);
3440
3441
  // get status
3442
4.34k
  if (status) {
3443
340
    UpdateStatus(call, status, Builder, hlslOp);
3444
340
  }
3445
4.34k
}
3446
3447
Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3448
                       HLOperationLowerHelper &helper,
3449
                       HLObjectOperationLowerHelper *pObjHelper,
3450
4.34k
                       bool &Translated) {
3451
4.34k
  hlsl::OP *hlslOP = &helper.hlslOP;
3452
4.34k
  SampleHelper sampleHelper(CI, opcode, pObjHelper);
3453
3454
4.34k
  if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
3455
0
    Translated = false;
3456
0
    return nullptr;
3457
0
  }
3458
4.34k
  Type *Ty = CI->getType();
3459
3460
4.34k
  Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
3461
3462
4.34k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
3463
3464
4.34k
  switch (opcode) {
3465
2.41k
  case OP::OpCode::Sample: {
3466
2.41k
    Value *sampleArgs[] = {
3467
2.41k
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3468
        // Coord.
3469
2.41k
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3470
2.41k
        sampleHelper.coord[3],
3471
        // Offset.
3472
2.41k
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3473
        // Clamp.
3474
2.41k
        sampleHelper.clamp};
3475
2.41k
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3476
2.41k
  } break;
3477
1.00k
  case OP::OpCode::SampleLevel: {
3478
1.00k
    Value *sampleArgs[] = {
3479
1.00k
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3480
        // Coord.
3481
1.00k
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3482
1.00k
        sampleHelper.coord[3],
3483
        // Offset.
3484
1.00k
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3485
        // LOD.
3486
1.00k
        sampleHelper.lod};
3487
1.00k
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3488
1.00k
  } break;
3489
120
  case OP::OpCode::SampleGrad: {
3490
120
    Value *sampleArgs[] = {
3491
120
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3492
        // Coord.
3493
120
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3494
120
        sampleHelper.coord[3],
3495
        // Offset.
3496
120
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3497
        // Ddx.
3498
120
        sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
3499
        // Ddy.
3500
120
        sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
3501
        // Clamp.
3502
120
        sampleHelper.clamp};
3503
120
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3504
120
  } break;
3505
180
  case OP::OpCode::SampleBias: {
3506
180
    Value *sampleArgs[] = {
3507
180
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3508
        // Coord.
3509
180
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3510
180
        sampleHelper.coord[3],
3511
        // Offset.
3512
180
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3513
        // Bias.
3514
180
        sampleHelper.bias,
3515
        // Clamp.
3516
180
        sampleHelper.clamp};
3517
180
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3518
180
  } break;
3519
48
  case OP::OpCode::SampleCmpBias: {
3520
48
    Value *sampleArgs[] = {
3521
48
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3522
        // Coord.
3523
48
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3524
48
        sampleHelper.coord[3],
3525
        // Offset.
3526
48
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3527
        // CmpVal.
3528
48
        sampleHelper.compareValue,
3529
        // Bias.
3530
48
        sampleHelper.bias,
3531
        // Clamp.
3532
48
        sampleHelper.clamp};
3533
48
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3534
48
  } break;
3535
48
  case OP::OpCode::SampleCmpGrad: {
3536
48
    Value *sampleArgs[] = {
3537
48
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3538
        // Coord.
3539
48
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3540
48
        sampleHelper.coord[3],
3541
        // Offset.
3542
48
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3543
        // CmpVal.
3544
48
        sampleHelper.compareValue,
3545
        // Ddx.
3546
48
        sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
3547
        // Ddy.
3548
48
        sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
3549
        // Clamp.
3550
48
        sampleHelper.clamp};
3551
48
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3552
48
  } break;
3553
206
  case OP::OpCode::SampleCmp: {
3554
206
    Value *sampleArgs[] = {
3555
206
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3556
        // Coord.
3557
206
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3558
206
        sampleHelper.coord[3],
3559
        // Offset.
3560
206
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3561
        // CmpVal.
3562
206
        sampleHelper.compareValue,
3563
        // Clamp.
3564
206
        sampleHelper.clamp};
3565
206
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3566
206
  } break;
3567
192
  case OP::OpCode::SampleCmpLevel: {
3568
192
    Value *sampleArgs[] = {
3569
192
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3570
        // Coord.
3571
192
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3572
192
        sampleHelper.coord[3],
3573
        // Offset.
3574
192
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3575
        // CmpVal.
3576
192
        sampleHelper.compareValue,
3577
        // LOD.
3578
192
        sampleHelper.lod};
3579
192
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3580
192
  } break;
3581
140
  case OP::OpCode::SampleCmpLevelZero:
3582
140
  default: {
3583
140
    DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
3584
140
    Value *sampleArgs[] = {
3585
140
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3586
        // Coord.
3587
140
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3588
140
        sampleHelper.coord[3],
3589
        // Offset.
3590
140
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3591
        // CmpVal.
3592
140
        sampleHelper.compareValue};
3593
140
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3594
140
  } break;
3595
4.34k
  }
3596
  // CI is replaced in GenerateDxilSample.
3597
4.34k
  return nullptr;
3598
4.34k
}
3599
3600
// Gather intrinsics.
3601
struct GatherHelper {
3602
  enum class GatherChannel {
3603
    GatherAll,
3604
    GatherRed,
3605
    GatherGreen,
3606
    GatherBlue,
3607
    GatherAlpha,
3608
  };
3609
3610
  GatherHelper(CallInst *CI, OP::OpCode op,
3611
               HLObjectOperationLowerHelper *pObjHelper,
3612
               GatherHelper::GatherChannel ch);
3613
3614
  OP::OpCode opcode;
3615
  Value *texHandle;
3616
  Value *samplerHandle;
3617
  static const unsigned kMaxCoordDimensions = 4;
3618
  Value *coord[kMaxCoordDimensions];
3619
  unsigned channel;
3620
  Value *special; // For CompareValue, Bias, LOD.
3621
  // Optional.
3622
  static const unsigned kMaxOffsetDimensions = 2;
3623
  Value *offset[kMaxOffsetDimensions];
3624
  // For the overload send different offset for each sample.
3625
  // Only save 3 sampleOffsets because use offset for normal overload as first
3626
  // sample offset.
3627
  static const unsigned kSampleOffsetDimensions = 3;
3628
  Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
3629
  Value *status;
3630
3631
  bool hasSampleOffsets;
3632
3633
  unsigned maxHLOperandRead = 0;
3634
6.79k
  Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
3635
6.79k
    if (CI->getNumArgOperands() > opIdx) {
3636
4.58k
      maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
3637
4.58k
      return CI->getArgOperand(opIdx);
3638
4.58k
    }
3639
2.20k
    return nullptr;
3640
6.79k
  }
3641
  void TranslateCoord(CallInst *CI, unsigned coordIdx,
3642
1.73k
                      unsigned coordDimensions) {
3643
1.73k
    Value *coordArg = ReadHLOperand(CI, coordIdx);
3644
1.73k
    DXASSERT_NOMSG(coordArg);
3645
1.73k
    DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
3646
1.73k
             "otherwise, HL coordinate dimensions mismatch");
3647
1.73k
    IRBuilder<> Builder(CI);
3648
5.80k
    for (unsigned i = 0; i < coordDimensions; 
i++4.06k
)
3649
4.06k
      coord[i] = Builder.CreateExtractElement(coordArg, i);
3650
1.73k
    Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
3651
4.62k
    for (unsigned i = coordDimensions; i < kMaxCoordDimensions; 
i++2.88k
)
3652
2.88k
      coord[i] = undefF;
3653
1.73k
  }
3654
1.73k
  void SetStatus(CallInst *CI, unsigned statusIdx) {
3655
1.73k
    status = ReadHLOperand(CI, statusIdx);
3656
1.73k
  }
3657
  void TranslateOffset(CallInst *CI, unsigned offsetIdx,
3658
1.73k
                       unsigned offsetDimensions) {
3659
1.73k
    IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
3660
1.73k
    if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
3661
804
      DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
3662
804
               "otherwise, HL coordinate dimensions mismatch");
3663
804
      IRBuilder<> Builder(CI);
3664
2.41k
      for (unsigned i = 0; i < offsetDimensions; 
i++1.60k
)
3665
1.60k
        offset[i] = Builder.CreateExtractElement(offsetArg, i);
3666
934
    } else {
3667
      // Use zeros for offsets when not specified, not undef.
3668
934
      Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
3669
2.14k
      for (unsigned i = 0; i < offsetDimensions; 
i++1.21k
)
3670
1.21k
        offset[i] = zero;
3671
934
    }
3672
    // Use undef for components that should not be used for this resource dim.
3673
1.73k
    Value *undefI = UndefValue::get(i32Ty);
3674
2.39k
    for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; 
i++656
)
3675
656
      offset[i] = undefI;
3676
1.73k
  }
3677
  void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
3678
848
                             unsigned offsetDimensions) {
3679
848
    Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
3680
848
    if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
3681
344
      hasSampleOffsets = true;
3682
344
      IRBuilder<> Builder(CI);
3683
1.37k
      for (unsigned ch = 0; ch < kSampleOffsetDimensions; 
ch++1.03k
) {
3684
1.03k
        Value *offsetArg = ReadHLOperand(CI, offsetIdx + ch);
3685
3.09k
        for (unsigned i = 0; i < offsetDimensions; 
i++2.06k
)
3686
2.06k
          sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
3687
1.03k
        for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; 
i++0
)
3688
0
          sampleOffsets[ch][i] = undefI;
3689
1.03k
      }
3690
344
    }
3691
848
  }
3692
  // Update the offset args for gather with sample offset at sampleIdx.
3693
  void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
3694
1.03k
                                unsigned sampleIdx) {
3695
1.03k
    unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
3696
3.09k
    for (unsigned i = 0; i < kMaxOffsetDimensions; 
i++2.06k
)
3697
      // -1 because offset for sample 0 is in GatherHelper::offset.
3698
2.06k
      gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
3699
1.03k
  }
3700
};
3701
3702
GatherHelper::GatherHelper(CallInst *CI, OP::OpCode op,
3703
                           HLObjectOperationLowerHelper *pObjHelper,
3704
                           GatherHelper::GatherChannel ch)
3705
1.73k
    : opcode(op), special(nullptr), hasSampleOffsets(false) {
3706
3707
1.73k
  switch (ch) {
3708
626
  case GatherChannel::GatherAll:
3709
626
    channel = 0;
3710
626
    break;
3711
320
  case GatherChannel::GatherRed:
3712
320
    channel = 0;
3713
320
    break;
3714
256
  case GatherChannel::GatherGreen:
3715
256
    channel = 1;
3716
256
    break;
3717
272
  case GatherChannel::GatherBlue:
3718
272
    channel = 2;
3719
272
    break;
3720
264
  case GatherChannel::GatherAlpha:
3721
264
    channel = 3;
3722
264
    break;
3723
1.73k
  }
3724
3725
1.73k
  IRBuilder<> Builder(CI);
3726
1.73k
  texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3727
1.73k
  samplerHandle = CI->getArgOperand(HLOperandIndex::kSampleSamplerArgIndex);
3728
3729
1.73k
  DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
3730
1.73k
  if (RK == DXIL::ResourceKind::Invalid) {
3731
0
    opcode = DXIL::OpCode::NumOpCodes;
3732
0
    return;
3733
0
  }
3734
1.73k
  unsigned coordSize = DxilResource::GetNumCoords(RK);
3735
1.73k
  unsigned offsetSize = DxilResource::GetNumOffsets(RK);
3736
1.73k
  bool cube = RK == DXIL::ResourceKind::TextureCube ||
3737
1.73k
              
RK == DXIL::ResourceKind::TextureCubeArray1.57k
;
3738
3739
1.73k
  const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
3740
1.73k
  TranslateCoord(CI, kCoordArgIdx, coordSize);
3741
3742
1.73k
  switch (op) {
3743
1.04k
  case OP::OpCode::TextureGather: {
3744
1.04k
    unsigned statusIdx;
3745
1.04k
    if (cube) {
3746
168
      TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
3747
168
      statusIdx = HLOperandIndex::kGatherCubeStatusArgIndex;
3748
880
    } else {
3749
880
      TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
3750
      // Gather all don't have sample offset version overload.
3751
880
      if (ch != GatherChannel::GatherAll)
3752
536
        TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
3753
536
                              offsetSize);
3754
880
      statusIdx = hasSampleOffsets
3755
880
                      ? 
HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex216
3756
880
                      : 
HLOperandIndex::kGatherStatusArgIndex664
;
3757
880
    }
3758
1.04k
    SetStatus(CI, statusIdx);
3759
1.04k
  } break;
3760
546
  case OP::OpCode::TextureGatherCmp: {
3761
546
    special = ReadHLOperand(CI, HLOperandIndex::kGatherCmpCmpValArgIndex);
3762
546
    unsigned statusIdx;
3763
546
    if (cube) {
3764
160
      TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
3765
160
      statusIdx = HLOperandIndex::kGatherCmpCubeStatusArgIndex;
3766
386
    } else {
3767
386
      TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
3768
      // Gather all don't have sample offset version overload.
3769
386
      if (ch != GatherChannel::GatherAll)
3770
312
        TranslateSampleOffset(
3771
312
            CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex, offsetSize);
3772
386
      statusIdx = hasSampleOffsets
3773
386
                      ? 
HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex128
3774
386
                      : 
HLOperandIndex::kGatherCmpStatusArgIndex258
;
3775
386
    }
3776
546
    SetStatus(CI, statusIdx);
3777
546
  } break;
3778
144
  case OP::OpCode::TextureGatherRaw: {
3779
144
    unsigned statusIdx;
3780
144
    TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
3781
    // Gather all don't have sample offset version overload.
3782
144
    DXASSERT(ch == GatherChannel::GatherAll,
3783
144
             "Raw gather must use all channels");
3784
144
    DXASSERT(!cube, "Raw gather can't be used with cube textures");
3785
144
    DXASSERT(!hasSampleOffsets,
3786
144
             "Raw gather doesn't support individual offsets");
3787
144
    statusIdx = HLOperandIndex::kGatherStatusArgIndex;
3788
144
    SetStatus(CI, statusIdx);
3789
144
  } break;
3790
0
  default:
3791
0
    DXASSERT(0, "invalid opcode for Gather");
3792
0
    break;
3793
1.73k
  }
3794
1.73k
  DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
3795
1.73k
           "otherwise, unused HL arguments for Sample op");
3796
1.73k
}
3797
3798
void GenerateDxilGather(CallInst *CI, Function *F,
3799
                        MutableArrayRef<Value *> gatherArgs,
3800
1.73k
                        GatherHelper &helper, hlsl::OP *hlslOp) {
3801
1.73k
  IRBuilder<> Builder(CI);
3802
3803
1.73k
  CallInst *call = Builder.CreateCall(F, gatherArgs);
3804
3805
1.73k
  dxilutil::MigrateDebugValue(CI, call);
3806
3807
1.73k
  Value *retVal;
3808
1.73k
  if (!helper.hasSampleOffsets) {
3809
    // extract value part
3810
1.39k
    retVal = ScalarizeResRet(CI->getType(), call, Builder);
3811
1.39k
  } else {
3812
344
    retVal = UndefValue::get(CI->getType());
3813
344
    Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
3814
344
    retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
3815
3816
344
    helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
3817
344
    CallInst *callY = Builder.CreateCall(F, gatherArgs);
3818
344
    elt = Builder.CreateExtractValue(callY, (uint64_t)1);
3819
344
    retVal = Builder.CreateInsertElement(retVal, elt, 1);
3820
3821
344
    helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
3822
344
    CallInst *callZ = Builder.CreateCall(F, gatherArgs);
3823
344
    elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
3824
344
    retVal = Builder.CreateInsertElement(retVal, elt, 2);
3825
3826
344
    helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
3827
344
    CallInst *callW = Builder.CreateCall(F, gatherArgs);
3828
344
    elt = Builder.CreateExtractValue(callW, (uint64_t)3);
3829
344
    retVal = Builder.CreateInsertElement(retVal, elt, 3);
3830
3831
    // TODO: UpdateStatus for each gather call.
3832
344
  }
3833
3834
  // Replace ret val.
3835
1.73k
  CI->replaceAllUsesWith(retVal);
3836
3837
  // Get status
3838
1.73k
  if (helper.status) {
3839
464
    UpdateStatus(call, helper.status, Builder, hlslOp);
3840
464
  }
3841
1.73k
}
3842
3843
Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3844
                       HLOperationLowerHelper &helper,
3845
                       HLObjectOperationLowerHelper *pObjHelper,
3846
1.73k
                       bool &Translated) {
3847
1.73k
  hlsl::OP *hlslOP = &helper.hlslOP;
3848
1.73k
  GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
3849
1.73k
  switch (IOP) {
3850
376
  case IntrinsicOp::MOP_Gather:
3851
482
  case IntrinsicOp::MOP_GatherCmp:
3852
626
  case IntrinsicOp::MOP_GatherRaw:
3853
626
    ch = GatherHelper::GatherChannel::GatherAll;
3854
626
    break;
3855
192
  case IntrinsicOp::MOP_GatherRed:
3856
320
  case IntrinsicOp::MOP_GatherCmpRed:
3857
320
    ch = GatherHelper::GatherChannel::GatherRed;
3858
320
    break;
3859
152
  case IntrinsicOp::MOP_GatherGreen:
3860
256
  case IntrinsicOp::MOP_GatherCmpGreen:
3861
256
    ch = GatherHelper::GatherChannel::GatherGreen;
3862
256
    break;
3863
168
  case IntrinsicOp::MOP_GatherBlue:
3864
272
  case IntrinsicOp::MOP_GatherCmpBlue:
3865
272
    ch = GatherHelper::GatherChannel::GatherBlue;
3866
272
    break;
3867
160
  case IntrinsicOp::MOP_GatherAlpha:
3868
264
  case IntrinsicOp::MOP_GatherCmpAlpha:
3869
264
    ch = GatherHelper::GatherChannel::GatherAlpha;
3870
264
    break;
3871
0
  default:
3872
0
    DXASSERT(0, "invalid gather intrinsic");
3873
0
    break;
3874
1.73k
  }
3875
3876
1.73k
  GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
3877
3878
1.73k
  if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
3879
0
    Translated = false;
3880
0
    return nullptr;
3881
0
  }
3882
1.73k
  Type *Ty = CI->getType();
3883
3884
1.73k
  Function *F = hlslOP->GetOpFunc(gatherHelper.opcode, Ty->getScalarType());
3885
3886
1.73k
  Constant *opArg = hlslOP->GetU32Const((unsigned)gatherHelper.opcode);
3887
1.73k
  Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
3888
3889
1.73k
  switch (opcode) {
3890
1.04k
  case OP::OpCode::TextureGather: {
3891
1.04k
    Value *gatherArgs[] = {opArg, gatherHelper.texHandle,
3892
1.04k
                           gatherHelper.samplerHandle,
3893
                           // Coord.
3894
1.04k
                           gatherHelper.coord[0], gatherHelper.coord[1],
3895
1.04k
                           gatherHelper.coord[2], gatherHelper.coord[3],
3896
                           // Offset.
3897
1.04k
                           gatherHelper.offset[0], gatherHelper.offset[1],
3898
                           // Channel.
3899
1.04k
                           channelArg};
3900
1.04k
    GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
3901
1.04k
  } break;
3902
546
  case OP::OpCode::TextureGatherCmp: {
3903
546
    Value *gatherArgs[] = {opArg, gatherHelper.texHandle,
3904
546
                           gatherHelper.samplerHandle,
3905
                           // Coord.
3906
546
                           gatherHelper.coord[0], gatherHelper.coord[1],
3907
546
                           gatherHelper.coord[2], gatherHelper.coord[3],
3908
                           // Offset.
3909
546
                           gatherHelper.offset[0], gatherHelper.offset[1],
3910
                           // Channel.
3911
546
                           channelArg,
3912
                           // CmpVal.
3913
546
                           gatherHelper.special};
3914
546
    GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
3915
546
  } break;
3916
144
  case OP::OpCode::TextureGatherRaw: {
3917
144
    Value *gatherArgs[] = {opArg, gatherHelper.texHandle,
3918
144
                           gatherHelper.samplerHandle,
3919
                           // Coord.
3920
144
                           gatherHelper.coord[0], gatherHelper.coord[1],
3921
144
                           gatherHelper.coord[2], gatherHelper.coord[3],
3922
                           // Offset.
3923
144
                           gatherHelper.offset[0], gatherHelper.offset[1]};
3924
144
    GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
3925
144
    break;
3926
0
  }
3927
0
  default:
3928
0
    DXASSERT(0, "invalid opcode for Gather");
3929
0
    break;
3930
1.73k
  }
3931
  // CI is replaced in GenerateDxilGather.
3932
1.73k
  return nullptr;
3933
1.73k
}
3934
3935
static Value *
3936
TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3937
                              HLOperationLowerHelper &helper,
3938
                              HLObjectOperationLowerHelper *pObjHelper,
3939
300
                              bool &Translated) {
3940
300
  hlsl::OP *hlslOP = &helper.hlslOP;
3941
300
  SampleHelper sampleHelper(CI, opcode, pObjHelper);
3942
3943
300
  if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
3944
0
    Translated = false;
3945
0
    return nullptr;
3946
0
  }
3947
300
  Type *Ty = CI->getType();
3948
3949
300
  Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
3950
3951
300
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
3952
3953
300
  IRBuilder<> Builder(CI);
3954
3955
300
  switch (opcode) {
3956
180
  case OP::OpCode::WriteSamplerFeedback: {
3957
180
    Value *samplerFeedbackArgs[] = {
3958
180
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
3959
180
        sampleHelper.samplerHandle,
3960
        // Coord.
3961
180
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3962
180
        sampleHelper.coord[3],
3963
        // Clamp.
3964
180
        sampleHelper.clamp};
3965
180
    return Builder.CreateCall(F, samplerFeedbackArgs);
3966
0
  } break;
3967
64
  case OP::OpCode::WriteSamplerFeedbackBias: {
3968
64
    Value *samplerFeedbackArgs[] = {
3969
64
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
3970
64
        sampleHelper.samplerHandle,
3971
        // Coord.
3972
64
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3973
64
        sampleHelper.coord[3],
3974
        // Bias.
3975
64
        sampleHelper.bias,
3976
        // Clamp.
3977
64
        sampleHelper.clamp};
3978
64
    return Builder.CreateCall(F, samplerFeedbackArgs);
3979
0
  } break;
3980
32
  case OP::OpCode::WriteSamplerFeedbackGrad: {
3981
32
    Value *samplerFeedbackArgs[] = {
3982
32
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
3983
32
        sampleHelper.samplerHandle,
3984
        // Coord.
3985
32
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3986
32
        sampleHelper.coord[3],
3987
        // Ddx.
3988
32
        sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
3989
        // Ddy.
3990
32
        sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
3991
        // Clamp.
3992
32
        sampleHelper.clamp};
3993
32
    return Builder.CreateCall(F, samplerFeedbackArgs);
3994
0
  } break;
3995
24
  case OP::OpCode::WriteSamplerFeedbackLevel: {
3996
24
    Value *samplerFeedbackArgs[] = {
3997
24
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
3998
24
        sampleHelper.samplerHandle,
3999
        // Coord.
4000
24
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
4001
24
        sampleHelper.coord[3],
4002
        // LOD.
4003
24
        sampleHelper.lod};
4004
24
    return Builder.CreateCall(F, samplerFeedbackArgs);
4005
0
  } break;
4006
0
  default:
4007
0
    DXASSERT(false, "otherwise, unknown SamplerFeedback Op");
4008
0
    break;
4009
300
  }
4010
0
  return nullptr;
4011
300
}
4012
4013
// Load/Store intrinsics.
4014
21.7k
OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) {
4015
21.7k
  switch (RK) {
4016
3.50k
  case DxilResource::Kind::RawBuffer:
4017
14.2k
  case DxilResource::Kind::StructuredBuffer:
4018
14.2k
    return OP::OpCode::RawBufferLoad;
4019
2.52k
  case DxilResource::Kind::TypedBuffer:
4020
2.52k
    return OP::OpCode::BufferLoad;
4021
0
  case DxilResource::Kind::Invalid:
4022
0
    DXASSERT(0, "invalid resource kind");
4023
0
    break;
4024
5.00k
  default:
4025
5.00k
    return OP::OpCode::TextureLoad;
4026
21.7k
  }
4027
0
  return OP::OpCode::TextureLoad;
4028
21.7k
}
4029
4030
struct ResLoadHelper {
4031
  // Default constructor uses CI load intrinsic call
4032
  //  to get the retval and various location indicators.
4033
  ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
4034
                Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr);
4035
  // Alternative constructor explicitly sets the index.
4036
  // Used for some subscript operators that feed the generic HL call inst
4037
  // into a load op and by the matrixload call instruction.
4038
  ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx,
4039
                Value *Offset, Value *status = nullptr, Value *mip = nullptr)
4040
10.1k
      : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst),
4041
10.1k
        addr(idx), offset(Offset), status(status), mipLevel(mip) {
4042
10.1k
    opcode = LoadOpFromResKind(RK);
4043
10.1k
    Type *Ty = Inst->getType();
4044
10.1k
    if (opcode == OP::OpCode::RawBufferLoad && 
Ty->isVectorTy()9.98k
&&
4045
10.1k
        
Ty->getVectorNumElements() > 15.96k
&&
4046
10.1k
        
Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()4.26k
)
4047
1.39k
      opcode = OP::OpCode::RawBufferVectorLoad;
4048
10.1k
  }
4049
  OP::OpCode opcode;
4050
  IntrinsicOp intrinsicOpCode;
4051
  unsigned dxilMajor;
4052
  unsigned dxilMinor;
4053
  Value *handle;
4054
  Value *retVal;
4055
  Value *addr;
4056
  Value *offset;
4057
  Value *status;
4058
  Value *mipLevel;
4059
};
4060
4061
// Uses CI arguments to determine the index, offset, and mipLevel also depending
4062
// on the RK/RC resource kind and class, which determine the opcode.
4063
// Handle and IOP are set explicitly.
4064
// For typed buffer loads, the call instruction feeds into a load
4065
// represented by TyBufSubLoad which determines the instruction to replace.
4066
// Otherwise, CI is replaced.
4067
ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
4068
                             DxilResourceBase::Class RC, Value *hdl,
4069
                             IntrinsicOp IOP, LoadInst *TyBufSubLoad)
4070
11.6k
    : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
4071
11.6k
  opcode = LoadOpFromResKind(RK);
4072
11.6k
  bool bForSubscript = false;
4073
11.6k
  if (TyBufSubLoad) {
4074
2.75k
    bForSubscript = true;
4075
2.75k
    retVal = TyBufSubLoad;
4076
2.75k
  } else
4077
8.86k
    retVal = CI;
4078
11.6k
  const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
4079
11.6k
  addr = CI->getArgOperand(kAddrIdx);
4080
11.6k
  unsigned argc = CI->getNumArgOperands();
4081
11.6k
  Type *i32Ty = Type::getInt32Ty(CI->getContext());
4082
11.6k
  unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
4083
11.6k
  unsigned OffsetIdx = HLOperandIndex::kInvalidIdx;
4084
4085
11.6k
  if (opcode == OP::OpCode::TextureLoad) {
4086
4.86k
    bool IsMS = (RK == DxilResource::Kind::Texture2DMS ||
4087
4.86k
                 
RK == DxilResource::Kind::Texture2DMSArray4.61k
);
4088
    // Set mip and status index.
4089
4.86k
    offset = UndefValue::get(i32Ty);
4090
4.86k
    if (IsMS) {
4091
      // Retrieve appropriate MS parameters.
4092
408
      StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
4093
      // MS textures keep the sample param (mipLevel) regardless of writability.
4094
408
      if (bForSubscript)
4095
50
        mipLevel = ConstantInt::get(i32Ty, 0);
4096
358
      else
4097
358
        mipLevel =
4098
358
            CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
4099
4.45k
    } else if (RC == DxilResourceBase::Class::UAV) {
4100
      // DXIL requires that non-MS UAV accesses set miplevel to undef.
4101
2.07k
      mipLevel = UndefValue::get(i32Ty);
4102
2.07k
      StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
4103
2.38k
    } else {
4104
      // Non-MS SRV case.
4105
2.38k
      StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
4106
2.38k
      if (bForSubscript)
4107
        // Having no miplevel param, single subscripted SRVs default to 0.
4108
1.37k
        mipLevel = ConstantInt::get(i32Ty, 0);
4109
1.00k
      else
4110
        // Mip is stored at the last channel of the coordinate vector.
4111
1.00k
        mipLevel = IRBuilder<>(CI).CreateExtractElement(
4112
1.00k
            addr, DxilResource::GetNumCoords(RK));
4113
2.38k
    }
4114
4.86k
    if (RC == DxilResourceBase::Class::SRV)
4115
2.68k
      OffsetIdx = IsMS ? 
HLOperandIndex::kTex2DMSLoadOffsetOpIdx304
4116
2.68k
                       : 
HLOperandIndex::kTexLoadOffsetOpIdx2.38k
;
4117
6.75k
  } else if (opcode == OP::OpCode::RawBufferLoad) {
4118
    // If native vectors are available and this load had a vector
4119
    // with more than one elements, convert the RawBufferLod to the
4120
    // native vector variant RawBufferVectorLoad.
4121
4.22k
    Type *Ty = CI->getType();
4122
4.22k
    if (Ty->isVectorTy() && 
Ty->getVectorNumElements() > 12.24k
&&
4123
4.22k
        
CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()2.11k
)
4124
1.06k
      opcode = OP::OpCode::RawBufferVectorLoad;
4125
4.22k
  }
4126
4127
  // Set offset.
4128
11.6k
  if (DXIL::IsStructuredBuffer(RK))
4129
    // Structured buffers receive no exterior offset in this constructor,
4130
    // but may need to increment it later.
4131
1.28k
    offset = ConstantInt::get(i32Ty, 0U);
4132
10.3k
  else if (argc > OffsetIdx)
4133
    // Textures may set the offset from an explicit argument.
4134
102
    offset = CI->getArgOperand(OffsetIdx);
4135
10.2k
  else
4136
    // All other cases use undef.
4137
10.2k
    offset = UndefValue::get(i32Ty);
4138
4139
  // Retrieve status value if provided.
4140
11.6k
  if (argc > StatusIdx)
4141
1.12k
    status = CI->getArgOperand(StatusIdx);
4142
11.6k
}
4143
4144
void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
4145
                                 hlsl::OP *OP, HLResource::Kind RK,
4146
                                 const DataLayout &DL);
4147
4148
static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents,
4149
11.6k
                                        hlsl::OP *OP) {
4150
11.6k
  unsigned mask = 0;
4151
4152
11.6k
  switch (NumComponents) {
4153
0
  case 0:
4154
0
    break;
4155
7.01k
  case 1:
4156
7.01k
    mask = DXIL::kCompMask_X;
4157
7.01k
    break;
4158
1.14k
  case 2:
4159
1.14k
    mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
4160
1.14k
    break;
4161
602
  case 3:
4162
602
    mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
4163
602
    break;
4164
2.88k
  case 4:
4165
2.88k
    mask = DXIL::kCompMask_All;
4166
2.88k
    break;
4167
0
  default:
4168
0
    DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
4169
11.6k
  }
4170
11.6k
  return OP->GetI8Const(mask);
4171
11.6k
}
4172
4173
Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
4174
                        Value *status, Type *EltTy,
4175
                        MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
4176
                        IRBuilder<> &Builder, unsigned NumComponents,
4177
                        Constant *alignment);
4178
4179
// Sets up arguments for buffer load call.
4180
static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
4181
                                               HLResource::Kind RK,
4182
                                               IRBuilder<> Builder,
4183
18.0k
                                               unsigned LdSize) {
4184
18.0k
  OP::OpCode opcode = helper.opcode;
4185
18.0k
  llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode);
4186
4187
18.0k
  unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 
4U3.22k
:
8U14.8k
;
4188
18.0k
  alignment = std::min(alignment, LdSize);
4189
18.0k
  Constant *alignmentVal = Builder.getInt32(alignment);
4190
4191
  // Assemble args specific to the type bab/struct/typed:
4192
  // - Typed needs to handle the possibility of vector coords
4193
  // - Raws need to calculate alignment and mask values.
4194
18.0k
  SmallVector<Value *, 10> Args;
4195
18.0k
  Args.emplace_back(opArg);         // opcode @0.
4196
18.0k
  Args.emplace_back(helper.handle); // Resource handle @1
4197
4198
  // Set offsets appropriate for the load operation.
4199
18.0k
  bool isVectorAddr = helper.addr->getType()->isVectorTy();
4200
18.0k
  if (opcode == OP::OpCode::TextureLoad) {
4201
3.68k
    llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty());
4202
4203
    // Set mip level or sample for MS texutures @2.
4204
3.68k
    Args.emplace_back(helper.mipLevel);
4205
    // Set texture coords according to resource kind @3-5
4206
    // Coords unused by the resource kind are undefs.
4207
3.68k
    unsigned coordSize = DxilResource::GetNumCoords(RK);
4208
14.7k
    for (unsigned i = 0; i < 3; 
i++11.0k
)
4209
11.0k
      if (i < coordSize)
4210
7.22k
        Args.emplace_back(isVectorAddr
4211
7.22k
                              ? 
Builder.CreateExtractElement(helper.addr, i)6.80k
4212
7.22k
                              : 
helper.addr420
);
4213
3.81k
      else
4214
3.81k
        Args.emplace_back(undefI);
4215
4216
    // Set texture offsets according to resource kind @7-9
4217
    // Coords unused by the resource kind are undefs.
4218
3.68k
    unsigned offsetSize = DxilResource::GetNumOffsets(RK);
4219
3.68k
    if (!helper.offset || isa<llvm::UndefValue>(helper.offset))
4220
3.57k
      offsetSize = 0;
4221
14.7k
    for (unsigned i = 0; i < 3; 
i++11.0k
)
4222
11.0k
      if (i < offsetSize)
4223
204
        Args.emplace_back(Builder.CreateExtractElement(helper.offset, i));
4224
10.8k
      else
4225
10.8k
        Args.emplace_back(undefI);
4226
14.3k
  } else {
4227
    // If not TextureLoad, it could be a typed or raw buffer load.
4228
    // They have mostly similar arguments.
4229
14.3k
    DXASSERT(opcode == OP::OpCode::RawBufferLoad ||
4230
14.3k
                 opcode == OP::OpCode::RawBufferVectorLoad ||
4231
14.3k
                 opcode == OP::OpCode::BufferLoad,
4232
14.3k
             "Wrong opcode in get load args");
4233
14.3k
    Args.emplace_back(
4234
14.3k
        isVectorAddr ? 
Builder.CreateExtractElement(helper.addr, (uint64_t)0)230
4235
14.3k
                     : 
helper.addr14.1k
);
4236
14.3k
    Args.emplace_back(helper.offset);
4237
14.3k
    if (opcode == OP::OpCode::RawBufferLoad) {
4238
      // Unlike typed buffer load, raw buffer load has mask and alignment.
4239
10.7k
      Args.emplace_back(nullptr);      // Mask will be added later %4.
4240
10.7k
      Args.emplace_back(alignmentVal); // alignment @5.
4241
10.7k
    } else 
if (3.66k
opcode == OP::OpCode::RawBufferVectorLoad3.66k
) {
4242
      // RawBufferVectorLoad takes just alignment, no mask.
4243
2.46k
      Args.emplace_back(alignmentVal); // alignment @4
4244
2.46k
    }
4245
14.3k
  }
4246
18.0k
  return Args;
4247
18.0k
}
4248
4249
// Emits as many calls as needed to load the full vector
4250
// Performs any needed extractions and conversions of the results.
4251
Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
4252
                        IRBuilder<> &Builder, hlsl::OP *OP,
4253
18.0k
                        const DataLayout &DL) {
4254
18.0k
  OP::OpCode opcode = helper.opcode;
4255
18.0k
  Type *Ty = helper.retVal->getType();
4256
4257
18.0k
  unsigned NumComponents = 1;
4258
18.0k
  if (Ty->isVectorTy())
4259
11.6k
    NumComponents = Ty->getVectorNumElements();
4260
4261
18.0k
  const bool isTyped = DXIL::IsTyped(RK);
4262
18.0k
  Type *EltTy = Ty->getScalarType();
4263
18.0k
  const bool is64 = (EltTy->isIntegerTy(64) || 
EltTy->isDoubleTy()16.8k
);
4264
18.0k
  const bool isBool = EltTy->isIntegerTy(1);
4265
  // Values will be loaded in memory representations.
4266
18.0k
  if (isBool || 
(17.7k
is6417.7k
&&
isTyped2.58k
))
4267
468
    EltTy = Builder.getInt32Ty();
4268
4269
  // Calculate load size with the scalar memory element type.
4270
18.0k
  unsigned LdSize = DL.getTypeAllocSize(EltTy);
4271
4272
  // Adjust number of components as needed.
4273
18.0k
  if (is64 && 
isTyped2.58k
) {
4274
    // 64-bit types are stored as int32 pairs in typed buffers.
4275
146
    DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords.");
4276
146
    NumComponents *= 2;
4277
17.9k
  } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
4278
    // Native vector loads only have a single vector element in ResRet.
4279
2.46k
    EltTy = VectorType::get(EltTy, NumComponents);
4280
2.46k
    NumComponents = 1;
4281
2.46k
  }
4282
4283
18.0k
  SmallVector<Value *, 10> Args = GetBufLoadArgs(helper, RK, Builder, LdSize);
4284
4285
  // Keep track of the first load for debug info migration.
4286
18.0k
  Value *FirstLd = nullptr;
4287
4288
18.0k
  unsigned OffsetIdx = 0;
4289
18.0k
  if (RK == DxilResource::Kind::RawBuffer)
4290
    // Raw buffers can't use offset param. Add to coord index.
4291
3.22k
    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
4292
14.8k
  else if (RK == DxilResource::Kind::StructuredBuffer)
4293
9.94k
    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
4294
4295
  // Create call(s) to function object and collect results in Elts.
4296
  // Typed buffer loads are limited to one load of up to 4 32-bit values.
4297
  // Raw buffer loads might need multiple loads in chunks of 4.
4298
18.0k
  SmallVector<Value *, 4> Elts(NumComponents);
4299
37.0k
  for (unsigned i = 0; i < NumComponents;) {
4300
    // Load 4 elements or however many less than 4 are left to load.
4301
18.9k
    unsigned chunkSize = std::min(NumComponents - i, 4U);
4302
4303
    // Assign mask for raw buffer loads.
4304
18.9k
    if (opcode == OP::OpCode::RawBufferLoad) {
4305
11.6k
      Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
4306
11.6k
          GetRawBufferMaskForETy(EltTy, chunkSize, OP);
4307
      // If we've loaded a chunk already, update offset to next chunk.
4308
11.6k
      if (FirstLd != nullptr)
4309
916
        Args[OffsetIdx] =
4310
916
            Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize));
4311
11.6k
    }
4312
4313
18.9k
    Function *F = OP->GetOpFunc(opcode, EltTy);
4314
18.9k
    Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode));
4315
18.9k
    unsigned StatusIndex;
4316
4317
    // Extract elements from returned ResRet.
4318
    // Native vector loads just have one vector element in the ResRet.
4319
    // Others have up to four scalars that need to be individually extracted.
4320
18.9k
    if (opcode == OP::OpCode::RawBufferVectorLoad) {
4321
2.46k
      Elts[i++] = Builder.CreateExtractValue(Ld, 0);
4322
2.46k
      StatusIndex = DXIL::kVecResRetStatusIndex;
4323
16.5k
    } else {
4324
53.0k
      for (unsigned j = 0; j < chunkSize; 
j++, i++36.5k
)
4325
36.5k
        Elts[i] = Builder.CreateExtractValue(Ld, j);
4326
16.5k
      StatusIndex = DXIL::kResRetStatusIndex;
4327
16.5k
    }
4328
4329
    // Update status.
4330
18.9k
    UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex);
4331
4332
18.9k
    if (!FirstLd)
4333
18.0k
      FirstLd = Ld;
4334
18.9k
  }
4335
18.0k
  DXASSERT(FirstLd, "No loads created by TranslateBufLoad");
4336
4337
  // Convert loaded 32-bit integers to intended 64-bit type representation.
4338
18.0k
  if (isTyped) {
4339
4.88k
    Type *RegEltTy = Ty->getScalarType();
4340
4.88k
    if (RegEltTy->isDoubleTy()) {
4341
68
      Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy);
4342
68
      Value *makeDoubleOpArg =
4343
68
          Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
4344
68
      NumComponents /= 2; // Convert back to number of doubles.
4345
160
      for (unsigned i = 0; i < NumComponents; 
i++92
) {
4346
92
        Value *lo = Elts[2 * i];
4347
92
        Value *hi = Elts[2 * i + 1];
4348
92
        Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
4349
92
      }
4350
68
      EltTy = RegEltTy;
4351
4.81k
    } else if (RegEltTy->isIntegerTy(64)) {
4352
78
      NumComponents /= 2; // Convert back to number of int64s.
4353
192
      for (unsigned i = 0; i < NumComponents; 
i++114
) {
4354
114
        Value *lo = Elts[2 * i];
4355
114
        Value *hi = Elts[2 * i + 1];
4356
114
        lo = Builder.CreateZExt(lo, RegEltTy);
4357
114
        hi = Builder.CreateZExt(hi, RegEltTy);
4358
114
        hi = Builder.CreateShl(hi, 32);
4359
114
        Elts[i] = Builder.CreateOr(lo, hi);
4360
114
      }
4361
78
      EltTy = RegEltTy;
4362
78
    }
4363
4.88k
  }
4364
4365
  // Package elements into a vector as needed.
4366
18.0k
  Value *retValNew = nullptr;
4367
  // Scalar or native vector loads need not construct vectors from elements.
4368
18.0k
  if (!Ty->isVectorTy() || 
opcode == OP::OpCode::RawBufferVectorLoad11.6k
) {
4369
8.84k
    retValNew = Elts[0];
4370
9.20k
  } else {
4371
9.20k
    retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
4372
39.1k
    for (unsigned i = 0; i < NumComponents; 
i++29.9k
)
4373
29.9k
      retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i);
4374
9.20k
  }
4375
4376
  // Convert loaded int32 bool results to i1 register representation.
4377
18.0k
  if (isBool)
4378
322
    retValNew = Builder.CreateICmpNE(
4379
322
        retValNew, Constant::getNullValue(retValNew->getType()));
4380
4381
18.0k
  helper.retVal->replaceAllUsesWith(retValNew);
4382
18.0k
  helper.retVal = retValNew;
4383
4384
18.0k
  return FirstLd;
4385
18.0k
}
4386
4387
Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
4388
                             HLOperationLowerHelper &helper,
4389
                             HLObjectOperationLowerHelper *pObjHelper,
4390
6.21k
                             bool &Translated) {
4391
6.21k
  hlsl::OP *hlslOP = &helper.hlslOP;
4392
6.21k
  DataLayout &DL = helper.dataLayout;
4393
6.21k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
4394
4395
6.21k
  IRBuilder<> Builder(CI);
4396
4397
6.21k
  DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
4398
6.21k
  DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
4399
4400
6.21k
  ResLoadHelper ldHelper(CI, RK, RC, handle, IOP);
4401
6.21k
  Type *Ty = CI->getType();
4402
6.21k
  Value *Ld = nullptr;
4403
6.21k
  if (Ty->isPointerTy()) {
4404
1.05k
    DXASSERT(!DxilResource::IsAnyTexture(RK),
4405
1.05k
             "Textures should not be treated as structured buffers.");
4406
1.05k
    TranslateStructBufSubscript(cast<CallInst>(ldHelper.retVal), handle,
4407
1.05k
                                ldHelper.status, hlslOP, RK, DL);
4408
5.16k
  } else {
4409
5.16k
    Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
4410
5.16k
    dxilutil::MigrateDebugValue(CI, Ld);
4411
5.16k
  }
4412
  // CI is replaced by above translation calls..
4413
6.21k
  return nullptr;
4414
6.21k
}
4415
4416
// Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
4417
void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
4418
                           MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
4419
218
                           IRBuilder<> &Builder) {
4420
218
  Type *i32Ty = Builder.getInt32Ty();
4421
218
  Type *doubleTy = Builder.getDoubleTy();
4422
218
  Value *undefI32 = UndefValue::get(i32Ty);
4423
4424
218
  if (EltTy == doubleTy) {
4425
40
    Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
4426
40
    Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
4427
92
    for (unsigned i = 0; i < size; 
i++52
) {
4428
52
      if (isa<UndefValue>(vals[i])) {
4429
0
        vals32[2 * i] = undefI32;
4430
0
        vals32[2 * i + 1] = undefI32;
4431
52
      } else {
4432
52
        Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
4433
52
        Value *lo = Builder.CreateExtractValue(retVal, 0);
4434
52
        Value *hi = Builder.CreateExtractValue(retVal, 1);
4435
52
        vals32[2 * i] = lo;
4436
52
        vals32[2 * i + 1] = hi;
4437
52
      }
4438
52
    }
4439
178
  } else {
4440
372
    for (unsigned i = 0; i < size; 
i++194
) {
4441
194
      if (isa<UndefValue>(vals[i])) {
4442
0
        vals32[2 * i] = undefI32;
4443
0
        vals32[2 * i + 1] = undefI32;
4444
194
      } else {
4445
194
        Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
4446
194
        Value *hi = Builder.CreateLShr(vals[i], 32);
4447
194
        hi = Builder.CreateTrunc(hi, i32Ty);
4448
194
        vals32[2 * i] = lo;
4449
194
        vals32[2 * i + 1] = hi;
4450
194
      }
4451
194
    }
4452
178
  }
4453
218
}
4454
4455
void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
4456
                    Value *Idx, Value *offset, IRBuilder<> &Builder,
4457
16.6k
                    hlsl::OP *OP, Value *sampIdx = nullptr) {
4458
16.6k
  Type *Ty = val->getType();
4459
16.6k
  OP::OpCode opcode = OP::OpCode::NumOpCodes;
4460
16.6k
  bool IsTyped = true;
4461
16.6k
  switch (RK) {
4462
3.06k
  case DxilResource::Kind::RawBuffer:
4463
13.3k
  case DxilResource::Kind::StructuredBuffer:
4464
13.3k
    IsTyped = false;
4465
13.3k
    opcode = OP::OpCode::RawBufferStore;
4466
    // Where shader model and type allows, use vector store intrinsic.
4467
13.3k
    if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
4468
13.3k
        
Ty->isVectorTy()4.84k
&&
Ty->getVectorNumElements() > 13.76k
)
4469
2.35k
      opcode = OP::OpCode::RawBufferVectorStore;
4470
13.3k
    break;
4471
1.19k
  case DxilResource::Kind::TypedBuffer:
4472
1.19k
    opcode = OP::OpCode::BufferStore;
4473
1.19k
    break;
4474
0
  case DxilResource::Kind::Invalid:
4475
0
    DXASSERT(0, "invalid resource kind");
4476
0
    break;
4477
48
  case DxilResource::Kind::Texture2DMS:
4478
80
  case DxilResource::Kind::Texture2DMSArray:
4479
80
    opcode = OP::OpCode::TextureStoreSample;
4480
80
    break;
4481
1.97k
  default:
4482
1.97k
    opcode = OP::OpCode::TextureStore;
4483
1.97k
    break;
4484
16.6k
  }
4485
4486
16.6k
  Type *i32Ty = Builder.getInt32Ty();
4487
16.6k
  Type *i64Ty = Builder.getInt64Ty();
4488
16.6k
  Type *doubleTy = Builder.getDoubleTy();
4489
16.6k
  Type *EltTy = Ty->getScalarType();
4490
16.6k
  if (EltTy->isIntegerTy(1)) {
4491
    // Since we're going to memory, convert bools to their memory
4492
    // representation.
4493
344
    EltTy = i32Ty;
4494
344
    if (Ty->isVectorTy())
4495
316
      Ty = VectorType::get(EltTy, Ty->getVectorNumElements());
4496
28
    else
4497
28
      Ty = EltTy;
4498
344
    val = Builder.CreateZExt(val, Ty);
4499
344
  }
4500
4501
  // If RawBuffer store of 64-bit value, don't set alignment to 8,
4502
  // since buffer alignment isn't known to be anything over 4.
4503
16.6k
  unsigned alignValue = OP->GetAllocSizeForType(EltTy);
4504
16.6k
  if (RK == HLResource::Kind::RawBuffer && 
alignValue > 43.06k
)
4505
232
    alignValue = 4;
4506
16.6k
  Constant *Alignment = OP->GetI32Const(alignValue);
4507
16.6k
  bool is64 = EltTy == i64Ty || 
EltTy == doubleTy15.6k
;
4508
16.6k
  if (is64 && 
IsTyped1.82k
) {
4509
218
    EltTy = i32Ty;
4510
218
  }
4511
4512
16.6k
  llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
4513
4514
16.6k
  llvm::Value *undefI =
4515
16.6k
      llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
4516
4517
16.6k
  llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
4518
4519
16.6k
  SmallVector<Value *, 13> storeArgs;
4520
16.6k
  storeArgs.emplace_back(opArg);  // opcode
4521
16.6k
  storeArgs.emplace_back(handle); // resource handle
4522
4523
16.6k
  unsigned OffsetIdx = 0;
4524
16.6k
  if (opcode == OP::OpCode::RawBufferStore ||
4525
16.6k
      
opcode == OP::OpCode::RawBufferVectorStore5.60k
||
4526
16.6k
      
opcode == OP::OpCode::BufferStore3.24k
) {
4527
    // Append Coord0 (Index) value.
4528
14.5k
    if (Idx->getType()->isVectorTy()) {
4529
0
      Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0);
4530
0
      storeArgs.emplace_back(ScalarIdx); // Coord0 (Index).
4531
14.5k
    } else {
4532
14.5k
      storeArgs.emplace_back(Idx); // Coord0 (Index).
4533
14.5k
    }
4534
4535
    // Store OffsetIdx representing the argument that may need to be incremented
4536
    // later to load additional chunks of data.
4537
    // Only structured buffers can use the offset parameter.
4538
    // Others must increment the index.
4539
14.5k
    if (RK == DxilResource::Kind::StructuredBuffer)
4540
10.3k
      OffsetIdx = storeArgs.size();
4541
4.25k
    else
4542
4.25k
      OffsetIdx = storeArgs.size() - 1;
4543
4544
    // Coord1 (Offset).
4545
14.5k
    storeArgs.emplace_back(offset);
4546
14.5k
  } else {
4547
    // texture store
4548
2.05k
    unsigned coordSize = DxilResource::GetNumCoords(RK);
4549
4550
    // Set x first.
4551
2.05k
    if (Idx->getType()->isVectorTy())
4552
1.59k
      storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0));
4553
466
    else
4554
466
      storeArgs.emplace_back(Idx);
4555
4556
6.16k
    for (unsigned i = 1; i < 3; 
i++4.11k
) {
4557
4.11k
      if (i < coordSize)
4558
1.70k
        storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i));
4559
2.41k
      else
4560
2.41k
        storeArgs.emplace_back(undefI);
4561
4.11k
    }
4562
    // TODO: support mip for texture ST
4563
2.05k
  }
4564
4565
  // RawBufferVectorStore only takes a single value and alignment arguments.
4566
16.6k
  if (opcode == DXIL::OpCode::RawBufferVectorStore) {
4567
2.35k
    storeArgs.emplace_back(val);
4568
2.35k
    storeArgs.emplace_back(Alignment);
4569
2.35k
    Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty);
4570
2.35k
    Builder.CreateCall(F, storeArgs);
4571
2.35k
    return;
4572
2.35k
  }
4573
14.2k
  Function *F = OP->GetOpFunc(opcode, EltTy);
4574
4575
14.2k
  constexpr unsigned MaxStoreElemCount = 4;
4576
14.2k
  const unsigned CompCount = Ty->isVectorTy() ? 
Ty->getVectorNumElements()8.15k
:
16.12k
;
4577
14.2k
  const unsigned StoreInstCount =
4578
14.2k
      (CompCount / MaxStoreElemCount) + (CompCount % MaxStoreElemCount != 0);
4579
14.2k
  SmallVector<decltype(storeArgs), 4> storeArgsList;
4580
4581
  // Max number of element to store should be 16 (for a 4x4 matrix)
4582
14.2k
  DXASSERT_NOMSG(StoreInstCount >= 1 && StoreInstCount <= 4);
4583
4584
  // If number of elements to store exceeds the maximum number of elements
4585
  // that can be stored in a single store call,  make sure to generate enough
4586
  // store calls to store all elements
4587
29.0k
  for (unsigned j = 0; j < StoreInstCount; 
j++14.7k
) {
4588
14.7k
    decltype(storeArgs) newStoreArgs;
4589
14.7k
    for (Value *storeArg : storeArgs)
4590
60.9k
      newStoreArgs.emplace_back(storeArg);
4591
14.7k
    storeArgsList.emplace_back(newStoreArgs);
4592
14.7k
  }
4593
4594
29.0k
  for (unsigned j = 0; j < storeArgsList.size(); 
j++14.7k
) {
4595
    // For second and subsequent store calls, increment the resource-appropriate
4596
    // index or offset parameter.
4597
14.7k
    if (j > 0) {
4598
436
      unsigned EltSize = OP->GetAllocSizeForType(EltTy);
4599
436
      unsigned NewCoord = EltSize * MaxStoreElemCount * j;
4600
436
      Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord);
4601
436
      NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal);
4602
436
      storeArgsList[j][OffsetIdx] = NewCoordVal;
4603
436
    }
4604
4605
    // Set value parameters.
4606
14.7k
    uint8_t mask = 0;
4607
14.7k
    if (Ty->isVectorTy()) {
4608
8.59k
      unsigned vecSize =
4609
8.59k
          std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) -
4610
8.59k
          (j * MaxStoreElemCount);
4611
8.59k
      Value *emptyVal = undefVal;
4612
8.59k
      if (IsTyped) {
4613
1.69k
        mask = DXIL::kCompMask_All;
4614
1.69k
        emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
4615
1.69k
      }
4616
4617
42.9k
      for (unsigned i = 0; i < MaxStoreElemCount; 
i++34.3k
) {
4618
34.3k
        if (i < vecSize) {
4619
23.5k
          storeArgsList[j].emplace_back(
4620
23.5k
              Builder.CreateExtractElement(val, (j * MaxStoreElemCount) + i));
4621
23.5k
          mask |= (1 << i);
4622
23.5k
        } else {
4623
10.8k
          storeArgsList[j].emplace_back(emptyVal);
4624
10.8k
        }
4625
34.3k
      }
4626
4627
8.59k
    } else {
4628
6.12k
      if (IsTyped) {
4629
1.55k
        mask = DXIL::kCompMask_All;
4630
1.55k
        storeArgsList[j].emplace_back(val);
4631
1.55k
        storeArgsList[j].emplace_back(val);
4632
1.55k
        storeArgsList[j].emplace_back(val);
4633
1.55k
        storeArgsList[j].emplace_back(val);
4634
4.57k
      } else {
4635
4.57k
        storeArgsList[j].emplace_back(val);
4636
4.57k
        storeArgsList[j].emplace_back(undefVal);
4637
4.57k
        storeArgsList[j].emplace_back(undefVal);
4638
4.57k
        storeArgsList[j].emplace_back(undefVal);
4639
4.57k
        mask = DXIL::kCompMask_X;
4640
4.57k
      }
4641
6.12k
    }
4642
4643
14.7k
    if (is64 && 
IsTyped1.49k
) {
4644
218
      unsigned size = 1;
4645
218
      if (Ty->isVectorTy()) {
4646
36
        size =
4647
36
            std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) -
4648
36
            (j * MaxStoreElemCount);
4649
36
      }
4650
218
      DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
4651
218
      unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore ||
4652
218
                                   
opcode == DXIL::OpCode::TextureStoreSample114
4653
218
                               ? 
DXIL::OperandIndex::kTextureStoreVal0OpIdx112
4654
218
                               : 
DXIL::OperandIndex::kBufferStoreVal0OpIdx106
;
4655
218
      Value *V0 = storeArgsList[j][val0OpIdx];
4656
218
      Value *V1 = storeArgsList[j][val0OpIdx + 1];
4657
4658
218
      Value *vals32[4];
4659
218
      EltTy = Ty->getScalarType();
4660
218
      Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
4661
      // Fill the uninit vals.
4662
218
      if (size == 1) {
4663
190
        vals32[2] = vals32[0];
4664
190
        vals32[3] = vals32[1];
4665
190
      }
4666
      // Change valOp to 32 version.
4667
1.09k
      for (unsigned i = 0; i < 4; 
i++872
) {
4668
872
        storeArgsList[j][val0OpIdx + i] = vals32[i];
4669
872
      }
4670
      // change mask for double
4671
218
      if (opcode == DXIL::OpCode::RawBufferStore) {
4672
0
        mask = size == 1 ? DXIL::kCompMask_X | DXIL::kCompMask_Y
4673
0
                         : DXIL::kCompMask_All;
4674
0
      }
4675
218
    }
4676
4677
14.7k
    storeArgsList[j].emplace_back(OP->GetU8Const(mask)); // mask
4678
14.7k
    if (opcode == DXIL::OpCode::RawBufferStore)
4679
11.4k
      storeArgsList[j].emplace_back(Alignment); // alignment only for raw buffer
4680
3.24k
    else if (opcode == DXIL::OpCode::TextureStoreSample) {
4681
80
      storeArgsList[j].emplace_back(
4682
80
          sampIdx ? 
sampIdx40
4683
80
                  : 
Builder.getInt32(0)40
); // sample idx only for MS textures
4684
80
    }
4685
14.7k
    Builder.CreateCall(F, storeArgsList[j]);
4686
14.7k
  }
4687
14.2k
}
4688
4689
Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
4690
                              HLOperationLowerHelper &helper,
4691
                              HLObjectOperationLowerHelper *pObjHelper,
4692
3.06k
                              bool &Translated) {
4693
3.06k
  hlsl::OP *hlslOP = &helper.hlslOP;
4694
3.06k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
4695
4696
3.06k
  IRBuilder<> Builder(CI);
4697
3.06k
  DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
4698
4699
3.06k
  Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
4700
3.06k
  Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
4701
3.06k
  Value *UndefI = UndefValue::get(Builder.getInt32Ty());
4702
3.06k
  TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP);
4703
4704
3.06k
  return nullptr;
4705
3.06k
}
4706
} // namespace
4707
4708
// Atomic intrinsics.
4709
namespace {
4710
// Atomic intrinsics.
4711
struct AtomicHelper {
4712
  AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType = nullptr);
4713
  AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
4714
               Value *baseOffset, Type *opType = nullptr);
4715
  OP::OpCode opcode;
4716
  Value *handle;
4717
  Value *addr;
4718
  Value *offset; // Offset for structrued buffer.
4719
  Value *value;
4720
  Value *originalValue;
4721
  Value *compareValue;
4722
  Type *operationType;
4723
};
4724
4725
// For MOP version of Interlocked*.
4726
AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType)
4727
2.48k
    : opcode(op), handle(h), offset(nullptr), originalValue(nullptr),
4728
2.48k
      operationType(opType) {
4729
2.48k
  addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
4730
2.48k
  if (op == OP::OpCode::AtomicCompareExchange) {
4731
962
    compareValue = CI->getArgOperand(
4732
962
        HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
4733
962
    value =
4734
962
        CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
4735
962
    if (CI->getNumArgOperands() ==
4736
962
        (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
4737
526
      originalValue = CI->getArgOperand(
4738
526
          HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
4739
1.52k
  } else {
4740
1.52k
    value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
4741
1.52k
    if (CI->getNumArgOperands() ==
4742
1.52k
        (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
4743
1.34k
      originalValue = CI->getArgOperand(
4744
1.34k
          HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
4745
1.52k
  }
4746
2.48k
  if (nullptr == operationType)
4747
2.32k
    operationType = value->getType();
4748
2.48k
}
4749
// For IOP version of Interlocked*.
4750
AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
4751
                           Value *baseOffset, Type *opType)
4752
4.18k
    : opcode(op), handle(h), addr(bufIdx), offset(baseOffset),
4753
4.18k
      originalValue(nullptr), operationType(opType) {
4754
4.18k
  if (op == OP::OpCode::AtomicCompareExchange) {
4755
1.42k
    compareValue =
4756
1.42k
        CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
4757
1.42k
    value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
4758
1.42k
    if (CI->getNumArgOperands() ==
4759
1.42k
        (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
4760
692
      originalValue = CI->getArgOperand(
4761
692
          HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
4762
2.75k
  } else {
4763
2.75k
    value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
4764
2.75k
    if (CI->getNumArgOperands() ==
4765
2.75k
        (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
4766
720
      originalValue =
4767
720
          CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
4768
2.75k
  }
4769
4.18k
  if (nullptr == operationType)
4770
4.10k
    operationType = value->getType();
4771
4.18k
}
4772
4773
void TranslateAtomicBinaryOperation(AtomicHelper &helper,
4774
                                    DXIL::AtomicBinOpCode atomicOp,
4775
4.28k
                                    IRBuilder<> &Builder, hlsl::OP *hlslOP) {
4776
4.28k
  Value *handle = helper.handle;
4777
4.28k
  Value *addr = helper.addr;
4778
4.28k
  Value *val = helper.value;
4779
4.28k
  Type *Ty = helper.operationType;
4780
4.28k
  Type *valTy = val->getType();
4781
4782
4.28k
  Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
4783
4784
4.28k
  Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
4785
4.28k
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
4786
4.28k
  Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
4787
4788
4.28k
  if (Ty != valTy)
4789
72
    val = Builder.CreateBitCast(val, Ty);
4790
4791
4.28k
  Value *args[] = {opArg,  handle, atomicOpArg,
4792
4.28k
                   undefI, undefI, undefI, // coordinates
4793
4.28k
                   val};
4794
4795
  // Setup coordinates.
4796
4.28k
  if (addr->getType()->isVectorTy()) {
4797
250
    unsigned vectorNumElements = addr->getType()->getVectorNumElements();
4798
250
    DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
4799
250
    assert(vectorNumElements <= 3);
4800
846
    for (unsigned i = 0; i < vectorNumElements; 
i++596
) {
4801
596
      Value *Elt = Builder.CreateExtractElement(addr, i);
4802
596
      args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
4803
596
    }
4804
250
  } else
4805
4.03k
    args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
4806
4807
  // Set offset for structured buffer.
4808
4.28k
  if (helper.offset)
4809
1.00k
    args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
4810
4811
4.28k
  Value *origVal =
4812
4.28k
      Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
4813
4.28k
  if (helper.originalValue) {
4814
2.06k
    if (Ty != valTy)
4815
72
      origVal = Builder.CreateBitCast(origVal, valTy);
4816
2.06k
    Builder.CreateStore(origVal, helper.originalValue);
4817
2.06k
  }
4818
4.28k
}
4819
4820
Value *TranslateMopAtomicBinaryOperation(
4821
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
4822
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
4823
1.52k
    bool &Translated) {
4824
1.52k
  hlsl::OP *hlslOP = &helper.hlslOP;
4825
4826
1.52k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
4827
1.52k
  IRBuilder<> Builder(CI);
4828
4829
1.52k
  switch (IOP) {
4830
244
  case IntrinsicOp::MOP_InterlockedAdd:
4831
316
  case IntrinsicOp::MOP_InterlockedAdd64: {
4832
316
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4833
316
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
4834
316
                                   hlslOP);
4835
316
  } break;
4836
72
  case IntrinsicOp::MOP_InterlockedAnd:
4837
144
  case IntrinsicOp::MOP_InterlockedAnd64: {
4838
144
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4839
144
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
4840
144
                                   hlslOP);
4841
144
  } break;
4842
216
  case IntrinsicOp::MOP_InterlockedExchange:
4843
424
  case IntrinsicOp::MOP_InterlockedExchange64: {
4844
424
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4845
424
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
4846
424
                                   Builder, hlslOP);
4847
424
  } break;
4848
48
  case IntrinsicOp::MOP_InterlockedExchangeFloat: {
4849
48
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle,
4850
48
                        Type::getInt32Ty(CI->getContext()));
4851
48
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
4852
48
                                   Builder, hlslOP);
4853
48
  } break;
4854
58
  case IntrinsicOp::MOP_InterlockedMax:
4855
118
  case IntrinsicOp::MOP_InterlockedMax64: {
4856
118
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4857
118
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
4858
118
                                   hlslOP);
4859
118
  } break;
4860
58
  case IntrinsicOp::MOP_InterlockedMin:
4861
118
  case IntrinsicOp::MOP_InterlockedMin64: {
4862
118
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4863
118
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
4864
118
                                   hlslOP);
4865
118
  } break;
4866
34
  case IntrinsicOp::MOP_InterlockedUMax: {
4867
34
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4868
34
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
4869
34
                                   hlslOP);
4870
34
  } break;
4871
34
  case IntrinsicOp::MOP_InterlockedUMin: {
4872
34
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4873
34
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
4874
34
                                   hlslOP);
4875
34
  } break;
4876
72
  case IntrinsicOp::MOP_InterlockedOr:
4877
144
  case IntrinsicOp::MOP_InterlockedOr64: {
4878
144
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4879
144
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
4880
144
                                   hlslOP);
4881
144
  } break;
4882
72
  case IntrinsicOp::MOP_InterlockedXor:
4883
144
  case IntrinsicOp::MOP_InterlockedXor64:
4884
144
  default: {
4885
144
    DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor ||
4886
144
                 IOP == IntrinsicOp::MOP_InterlockedXor64,
4887
144
             "invalid MOP atomic intrinsic");
4888
144
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4889
144
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
4890
144
                                   hlslOP);
4891
144
  } break;
4892
1.52k
  }
4893
4894
1.52k
  return nullptr;
4895
1.52k
}
4896
void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
4897
2.38k
                            hlsl::OP *hlslOP) {
4898
2.38k
  Value *handle = helper.handle;
4899
2.38k
  Value *addr = helper.addr;
4900
2.38k
  Value *val = helper.value;
4901
2.38k
  Value *cmpVal = helper.compareValue;
4902
4903
2.38k
  Type *Ty = helper.operationType;
4904
2.38k
  Type *valTy = val->getType();
4905
4906
2.38k
  Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
4907
4908
2.38k
  Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
4909
2.38k
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
4910
4911
2.38k
  if (Ty != valTy) {
4912
168
    val = Builder.CreateBitCast(val, Ty);
4913
168
    if (cmpVal)
4914
168
      cmpVal = Builder.CreateBitCast(cmpVal, Ty);
4915
168
  }
4916
4917
2.38k
  Value *args[] = {opArg,  handle, undefI, undefI, undefI, // coordinates
4918
2.38k
                   cmpVal, val};
4919
4920
  // Setup coordinates.
4921
2.38k
  if (addr->getType()->isVectorTy()) {
4922
60
    unsigned vectorNumElements = addr->getType()->getVectorNumElements();
4923
60
    DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
4924
60
    assert(vectorNumElements <= 3);
4925
196
    for (unsigned i = 0; i < vectorNumElements; 
i++136
) {
4926
136
      Value *Elt = Builder.CreateExtractElement(addr, i);
4927
136
      args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
4928
136
    }
4929
60
  } else
4930
2.32k
    args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
4931
4932
  // Set offset for structured buffer.
4933
2.38k
  if (helper.offset)
4934
536
    args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
4935
4936
2.38k
  Value *origVal = Builder.CreateCall(dxilAtomic, args);
4937
2.38k
  if (helper.originalValue) {
4938
1.21k
    if (Ty != valTy)
4939
84
      origVal = Builder.CreateBitCast(origVal, valTy);
4940
1.21k
    Builder.CreateStore(origVal, helper.originalValue);
4941
1.21k
  }
4942
2.38k
}
4943
4944
Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
4945
                                 OP::OpCode opcode,
4946
                                 HLOperationLowerHelper &helper,
4947
                                 HLObjectOperationLowerHelper *pObjHelper,
4948
962
                                 bool &Translated) {
4949
962
  hlsl::OP *hlslOP = &helper.hlslOP;
4950
4951
962
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
4952
962
  IRBuilder<> Builder(CI);
4953
962
  Type *opType = nullptr;
4954
962
  if (IOP == IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise ||
4955
962
      
IOP == IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise906
)
4956
112
    opType = Type::getInt32Ty(CI->getContext());
4957
962
  AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle,
4958
962
                            opType);
4959
962
  TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
4960
962
  return nullptr;
4961
962
}
4962
4963
void TranslateSharedMemOrNodeAtomicBinOp(CallInst *CI, IntrinsicOp IOP,
4964
1.49k
                                         Value *addr) {
4965
1.49k
  AtomicRMWInst::BinOp Op;
4966
1.49k
  IRBuilder<> Builder(CI);
4967
1.49k
  Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
4968
1.49k
  PointerType *ptrType = dyn_cast<PointerType>(
4969
1.49k
      CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
4970
1.49k
  bool needCast = ptrType && ptrType->getElementType()->isFloatTy();
4971
1.49k
  switch (IOP) {
4972
376
  case IntrinsicOp::IOP_InterlockedAdd:
4973
376
    Op = AtomicRMWInst::BinOp::Add;
4974
376
    break;
4975
104
  case IntrinsicOp::IOP_InterlockedAnd:
4976
104
    Op = AtomicRMWInst::BinOp::And;
4977
104
    break;
4978
472
  case IntrinsicOp::IOP_InterlockedExchange:
4979
472
    if (needCast) {
4980
48
      val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext()));
4981
48
      addr = Builder.CreateBitCast(
4982
48
          addr, Type::getInt32PtrTy(CI->getContext(),
4983
48
                                    addr->getType()->getPointerAddressSpace()));
4984
48
    }
4985
472
    Op = AtomicRMWInst::BinOp::Xchg;
4986
472
    break;
4987
68
  case IntrinsicOp::IOP_InterlockedMax:
4988
68
    Op = AtomicRMWInst::BinOp::Max;
4989
68
    break;
4990
84
  case IntrinsicOp::IOP_InterlockedUMax:
4991
84
    Op = AtomicRMWInst::BinOp::UMax;
4992
84
    break;
4993
60
  case IntrinsicOp::IOP_InterlockedMin:
4994
60
    Op = AtomicRMWInst::BinOp::Min;
4995
60
    break;
4996
68
  case IntrinsicOp::IOP_InterlockedUMin:
4997
68
    Op = AtomicRMWInst::BinOp::UMin;
4998
68
    break;
4999
156
  case IntrinsicOp::IOP_InterlockedOr:
5000
156
    Op = AtomicRMWInst::BinOp::Or;
5001
156
    break;
5002
104
  case IntrinsicOp::IOP_InterlockedXor:
5003
104
  default:
5004
104
    DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
5005
104
    Op = AtomicRMWInst::BinOp::Xor;
5006
104
    break;
5007
1.49k
  }
5008
5009
1.49k
  Value *Result = Builder.CreateAtomicRMW(
5010
1.49k
      Op, addr, val, AtomicOrdering::SequentiallyConsistent);
5011
1.49k
  if (CI->getNumArgOperands() >
5012
1.49k
      HLOperandIndex::kInterlockedOriginalValueOpIndex) {
5013
574
    if (needCast)
5014
48
      Result =
5015
48
          Builder.CreateBitCast(Result, Type::getFloatTy(CI->getContext()));
5016
574
    Builder.CreateStore(
5017
574
        Result,
5018
574
        CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
5019
574
  }
5020
1.49k
}
5021
5022
3.65k
static Value *SkipAddrSpaceCast(Value *Ptr) {
5023
3.65k
  if (AddrSpaceCastInst *CastInst = dyn_cast<AddrSpaceCastInst>(Ptr))
5024
2.25k
    return CastInst->getOperand(0);
5025
1.40k
  if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Ptr)) {
5026
400
    if (ConstExpr->getOpcode() == Instruction::AddrSpaceCast) {
5027
400
      return ConstExpr->getOperand(0);
5028
400
    }
5029
400
  }
5030
1.00k
  return Ptr;
5031
1.40k
}
5032
5033
Value *
5034
TranslateNodeIncrementOutputCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
5035
                                  HLOperationLowerHelper &helper,
5036
                                  HLObjectOperationLowerHelper *pObjHelper,
5037
84
                                  bool isPerThread, bool &Translated) {
5038
5039
84
  hlsl::OP *OP = &helper.hlslOP;
5040
84
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5041
84
  Value *count =
5042
84
      CI->getArgOperand(HLOperandIndex::kIncrementOutputCountCountIdx);
5043
84
  Function *dxilFunc = OP->GetOpFunc(op, CI->getType());
5044
84
  Value *opArg = OP->GetU32Const((unsigned)op);
5045
84
  Value *perThread = OP->GetI1Const(isPerThread);
5046
5047
84
  Value *args[] = {opArg, handle, count, perThread};
5048
5049
84
  IRBuilder<> Builder(CI);
5050
84
  Builder.CreateCall(dxilFunc, args);
5051
84
  return nullptr;
5052
84
}
5053
5054
/*
5055
HLSL:
5056
void EmptyNodeOutput::GroupIncrementOutputCount(uint count)
5057
DXIL:
5058
void @dx.op.groupIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle
5059
%NodeOutput, i32 count)
5060
*/
5061
Value *TranslateNodeGroupIncrementOutputCount(
5062
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
5063
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5064
76
    bool &Translated) {
5065
76
  return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper,
5066
76
                                           /*isPerThread*/ false, Translated);
5067
76
}
5068
5069
/*
5070
HLSL:
5071
void EmptyNodeOutput::ThreadIncrementOutputCount(uint count)
5072
DXIL:
5073
void @dx.op.threadIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle
5074
%NodeOutput, i32 count)
5075
*/
5076
Value *TranslateNodeThreadIncrementOutputCount(
5077
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
5078
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5079
8
    bool &Translated) {
5080
8
  return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper,
5081
8
                                           /*isPerThread*/ true, Translated);
5082
8
}
5083
5084
// For known non-groupshared, verify that the destination param is valid
5085
void ValidateAtomicDestination(CallInst *CI,
5086
1.00k
                               HLObjectOperationLowerHelper *pObjHelper) {
5087
1.00k
  Value *dest = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
5088
  // If we encounter a gep, we may provide a more specific error message
5089
1.00k
  bool hasGep = isa<GetElementPtrInst>(dest);
5090
5091
  // Confirm that dest is a properly-used UAV
5092
5093
  // Drill through subscripts and geps, anything else indicates a misuse
5094
2.23k
  while (true) {
5095
2.23k
    if (GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(dest)) {
5096
284
      dest = gep->getPointerOperand();
5097
284
      continue;
5098
284
    }
5099
1.95k
    if (CallInst *handle = dyn_cast<CallInst>(dest)) {
5100
1.86k
      hlsl::HLOpcodeGroup group =
5101
1.86k
          hlsl::GetHLOpcodeGroup(handle->getCalledFunction());
5102
1.86k
      if (group != HLOpcodeGroup::HLSubscript)
5103
914
        break;
5104
946
      dest = handle->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
5105
946
      continue;
5106
1.86k
    }
5107
90
    break;
5108
1.95k
  }
5109
5110
1.00k
  if (pObjHelper->GetRC(dest) == DXIL::ResourceClass::UAV) {
5111
914
    DXIL::ResourceKind RK = pObjHelper->GetRK(dest);
5112
914
    if (DXIL::IsStructuredBuffer(RK))
5113
404
      return; // no errors
5114
510
    if (DXIL::IsTyped(RK)) {
5115
510
      if (hasGep)
5116
16
        dxilutil::EmitErrorOnInstruction(
5117
16
            CI, "Typed resources used in atomic operations must have a scalar "
5118
16
                "element type.");
5119
510
      return; // error emitted or else no errors
5120
510
    }
5121
510
  }
5122
5123
90
  dxilutil::EmitErrorOnInstruction(
5124
90
      CI, "Atomic operation targets must be groupshared, Node Record or UAV.");
5125
90
}
5126
5127
Value *TranslateIopAtomicBinaryOperation(
5128
    CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
5129
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5130
2.42k
    bool &Translated) {
5131
2.42k
  Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
5132
2.42k
  addr = SkipAddrSpaceCast(addr);
5133
5134
2.42k
  unsigned addressSpace = addr->getType()->getPointerAddressSpace();
5135
2.42k
  if (addressSpace == DXIL::kTGSMAddrSpace ||
5136
2.42k
      
addressSpace == DXIL::kNodeRecordAddrSpace974
)
5137
1.49k
    TranslateSharedMemOrNodeAtomicBinOp(CI, IOP, addr);
5138
928
  else {
5139
    // If not groupshared or node record, we either have an error case or will
5140
    // translate the atomic op in the process of translating users of the
5141
    // subscript operator Mark not translated and validate dest param
5142
928
    Translated = false;
5143
928
    ValidateAtomicDestination(CI, pObjHelper);
5144
928
  }
5145
5146
2.42k
  return nullptr;
5147
2.42k
}
5148
5149
1.16k
void TranslateSharedMemOrNodeAtomicCmpXChg(CallInst *CI, Value *addr) {
5150
1.16k
  Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
5151
1.16k
  Value *cmpVal =
5152
1.16k
      CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
5153
1.16k
  IRBuilder<> Builder(CI);
5154
5155
1.16k
  PointerType *ptrType = dyn_cast<PointerType>(
5156
1.16k
      CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
5157
1.16k
  bool needCast = false;
5158
1.16k
  if (ptrType && ptrType->getElementType()->isFloatTy()) {
5159
166
    needCast = true;
5160
166
    val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext()));
5161
166
    cmpVal = Builder.CreateBitCast(cmpVal, Type::getInt32Ty(CI->getContext()));
5162
166
    unsigned addrSpace = cast<PointerType>(addr->getType())->getAddressSpace();
5163
166
    addr = Builder.CreateBitCast(
5164
166
        addr, Type::getInt32PtrTy(CI->getContext(), addrSpace));
5165
166
  }
5166
5167
1.16k
  Value *Result = Builder.CreateAtomicCmpXchg(
5168
1.16k
      addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
5169
1.16k
      AtomicOrdering::SequentiallyConsistent);
5170
5171
1.16k
  if (CI->getNumArgOperands() >
5172
1.16k
      HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
5173
538
    Value *originVal = Builder.CreateExtractValue(Result, 0);
5174
538
    if (needCast)
5175
56
      originVal =
5176
56
          Builder.CreateBitCast(originVal, Type::getFloatTy(CI->getContext()));
5177
538
    Builder.CreateStore(
5178
538
        originVal,
5179
538
        CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
5180
538
  }
5181
1.16k
}
5182
5183
Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
5184
                                 DXIL::OpCode opcode,
5185
                                 HLOperationLowerHelper &helper,
5186
                                 HLObjectOperationLowerHelper *pObjHelper,
5187
1.23k
                                 bool &Translated) {
5188
1.23k
  Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
5189
1.23k
  addr = SkipAddrSpaceCast(addr);
5190
5191
1.23k
  unsigned addressSpace = addr->getType()->getPointerAddressSpace();
5192
1.23k
  if (addressSpace == DXIL::kTGSMAddrSpace ||
5193
1.23k
      
addressSpace == DXIL::kNodeRecordAddrSpace176
)
5194
1.16k
    TranslateSharedMemOrNodeAtomicCmpXChg(CI, addr);
5195
76
  else {
5196
    // If not groupshared, we either have an error case or will translate
5197
    // the atomic op in the process of translating users of the subscript
5198
    // operator Mark not translated and validate dest param
5199
76
    Translated = false;
5200
76
    ValidateAtomicDestination(CI, pObjHelper);
5201
76
  }
5202
5203
1.23k
  return nullptr;
5204
1.23k
}
5205
} // namespace
5206
5207
// Process Tess Factor.
5208
namespace {
5209
5210
// Clamp to [0.0f..1.0f], NaN->0.0f.
5211
Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP,
5212
288
                              IRBuilder<> &Builder) {
5213
288
  float fMin = 0;
5214
288
  float fMax = 1;
5215
288
  Type *f32Ty = input->getType()->getScalarType();
5216
288
  Value *minFactor = ConstantFP::get(f32Ty, fMin);
5217
288
  Value *maxFactor = ConstantFP::get(f32Ty, fMax);
5218
288
  Type *Ty = input->getType();
5219
288
  if (Ty->isVectorTy())
5220
288
    minFactor = SplatToVector(minFactor, input->getType(), Builder);
5221
288
  Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor,
5222
288
                                           hlslOP, Builder);
5223
288
  if (Ty->isVectorTy())
5224
288
    maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
5225
288
  return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP,
5226
288
                                    Builder);
5227
288
}
5228
5229
// Clamp to [1.0f..Inf], NaN->1.0f.
5230
288
Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5231
288
  float fMin = 1.0;
5232
288
  Type *f32Ty = input->getType()->getScalarType();
5233
288
  Value *minFactor = ConstantFP::get(f32Ty, fMin);
5234
288
  minFactor = SplatToVector(minFactor, input->getType(), Builder);
5235
288
  return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor,
5236
288
                                    hlslOP, Builder);
5237
288
}
5238
5239
// Do partitioning-specific clamping.
5240
Value *ClampTessFactor(Value *input,
5241
                       DXIL::TessellatorPartitioning partitionMode,
5242
680
                       hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5243
680
  const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
5244
680
  const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
5245
5246
680
  const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
5247
680
  const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
5248
5249
680
  const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
5250
5251
680
  float fMin;
5252
680
  float fMax;
5253
680
  switch (partitionMode) {
5254
152
  case DXIL::TessellatorPartitioning::Integer:
5255
152
    fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
5256
152
    fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
5257
152
    break;
5258
152
  case DXIL::TessellatorPartitioning::Pow2:
5259
152
    fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
5260
152
    fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
5261
152
    break;
5262
224
  case DXIL::TessellatorPartitioning::FractionalOdd:
5263
224
    fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
5264
224
    fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
5265
224
    break;
5266
152
  case DXIL::TessellatorPartitioning::FractionalEven:
5267
152
  default:
5268
152
    DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
5269
152
             "invalid partition mode");
5270
152
    fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
5271
152
    fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
5272
152
    break;
5273
680
  }
5274
680
  Type *f32Ty = input->getType()->getScalarType();
5275
680
  Value *minFactor = ConstantFP::get(f32Ty, fMin);
5276
680
  Value *maxFactor = ConstantFP::get(f32Ty, fMax);
5277
680
  Type *Ty = input->getType();
5278
680
  if (Ty->isVectorTy())
5279
632
    minFactor = SplatToVector(minFactor, input->getType(), Builder);
5280
680
  Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor,
5281
680
                                           hlslOP, Builder);
5282
680
  if (Ty->isVectorTy())
5283
632
    maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
5284
680
  return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP,
5285
680
                                    Builder);
5286
680
}
5287
5288
// round up for integer/pow2 partitioning
5289
// note that this code assumes the inputs should be in the range [1, inf),
5290
// which should be enforced by the clamp above.
5291
Value *RoundUpTessFactor(Value *input,
5292
                         DXIL::TessellatorPartitioning partitionMode,
5293
704
                         hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5294
704
  switch (partitionMode) {
5295
152
  case DXIL::TessellatorPartitioning::Integer:
5296
152
    return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP,
5297
152
                                     Builder);
5298
152
  case DXIL::TessellatorPartitioning::Pow2: {
5299
152
    const unsigned kExponentMask = 0x7f800000;
5300
152
    const unsigned kExponentLSB = 0x00800000;
5301
152
    const unsigned kMantissaMask = 0x007fffff;
5302
152
    Type *Ty = input->getType();
5303
    // (val = (asuint(val) & mantissamask) ?
5304
    //      (asuint(val) & exponentmask) + exponentbump :
5305
    //      asuint(val) & exponentmask;
5306
152
    Type *uintTy = Type::getInt32Ty(Ty->getContext());
5307
152
    if (Ty->isVectorTy())
5308
152
      uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
5309
152
    Value *uintVal =
5310
152
        Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
5311
5312
152
    Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
5313
152
    mantMask = SplatToVector(mantMask, uintTy, Builder);
5314
152
    Value *manVal = Builder.CreateAnd(uintVal, mantMask);
5315
5316
152
    Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
5317
152
    expMask = SplatToVector(expMask, uintTy, Builder);
5318
152
    Value *expVal = Builder.CreateAnd(uintVal, expMask);
5319
5320
152
    Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
5321
152
    expLSB = SplatToVector(expLSB, uintTy, Builder);
5322
152
    Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
5323
5324
152
    Value *manValNotZero =
5325
152
        Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
5326
152
    Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
5327
152
    return Builder.CreateUIToFP(factors, Ty);
5328
0
  } break;
5329
152
  case DXIL::TessellatorPartitioning::FractionalEven:
5330
400
  case DXIL::TessellatorPartitioning::FractionalOdd:
5331
400
    return input;
5332
0
  default:
5333
0
    DXASSERT(0, "invalid partition mode");
5334
0
    return nullptr;
5335
704
  }
5336
704
}
5337
5338
Value *TranslateProcessIsolineTessFactors(
5339
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5340
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5341
32
    bool &Translated) {
5342
32
  hlsl::OP *hlslOP = &helper.hlslOP;
5343
  // Get partition mode
5344
32
  DXASSERT_NOMSG(helper.functionProps);
5345
32
  DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull,
5346
32
           "must be hull shader");
5347
32
  DXIL::TessellatorPartitioning partition =
5348
32
      helper.functionProps->ShaderProps.HS.partition;
5349
5350
32
  IRBuilder<> Builder(CI);
5351
5352
32
  Value *rawDetailFactor =
5353
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
5354
32
  rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
5355
5356
32
  Value *rawDensityFactor =
5357
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
5358
32
  rawDensityFactor =
5359
32
      Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
5360
5361
32
  Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
5362
32
  init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
5363
32
  init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
5364
5365
32
  Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
5366
32
  Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
5367
5368
32
  Value *roundedDetailFactor =
5369
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
5370
32
  Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
5371
32
  Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
5372
32
  temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
5373
32
  Builder.CreateStore(temp, roundedDetailFactor);
5374
5375
32
  Value *roundedDensityFactor =
5376
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
5377
32
  Value *roundedY = Builder.CreateExtractElement(rounded, 1);
5378
32
  temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
5379
32
  Builder.CreateStore(temp, roundedDensityFactor);
5380
32
  return nullptr;
5381
32
}
5382
5383
// 3 inputs, 1 result
5384
Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
5385
120
                            IRBuilder<> &Builder) {
5386
120
  Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
5387
120
  Value *input1 = Builder.CreateExtractElement(input, 1);
5388
120
  Value *input2 = Builder.CreateExtractElement(input, 2);
5389
5390
120
  if (opcode == DXIL::OpCode::FMax || 
opcode == DXIL::OpCode::FMin80
) {
5391
72
    Value *temp =
5392
72
        TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
5393
72
    Value *combined =
5394
72
        TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
5395
72
    return combined;
5396
72
  }
5397
5398
  // Avg.
5399
48
  Value *temp = Builder.CreateFAdd(input0, input1);
5400
48
  Value *combined = Builder.CreateFAdd(temp, input2);
5401
48
  Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
5402
48
  combined = Builder.CreateFMul(combined, rcp);
5403
48
  return combined;
5404
120
}
5405
5406
// 4 inputs, 1 result
5407
Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
5408
120
                             hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5409
120
  Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
5410
120
  Value *input1 = Builder.CreateExtractElement(input, 1);
5411
120
  Value *input2 = Builder.CreateExtractElement(input, 2);
5412
120
  Value *input3 = Builder.CreateExtractElement(input, 3);
5413
5414
120
  if (opcode == DXIL::OpCode::FMax || 
opcode == DXIL::OpCode::FMin80
) {
5415
72
    Value *temp0 =
5416
72
        TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
5417
72
    Value *temp1 =
5418
72
        TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
5419
72
    Value *combined =
5420
72
        TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
5421
72
    return combined;
5422
72
  }
5423
5424
  // Avg.
5425
48
  Value *temp0 = Builder.CreateFAdd(input0, input1);
5426
48
  Value *temp1 = Builder.CreateFAdd(input2, input3);
5427
48
  Value *combined = Builder.CreateFAdd(temp0, temp1);
5428
48
  Value *rcp = ConstantFP::get(input0->getType(), 0.25);
5429
48
  combined = Builder.CreateFMul(combined, rcp);
5430
48
  return combined;
5431
120
}
5432
5433
// 4 inputs, 2 result
5434
Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
5435
120
                               hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5436
120
  Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
5437
120
  Value *input1 = Builder.CreateExtractElement(input, 1);
5438
120
  Value *input2 = Builder.CreateExtractElement(input, 2);
5439
120
  Value *input3 = Builder.CreateExtractElement(input, 3);
5440
5441
120
  if (opcode == DXIL::OpCode::FMax || 
opcode == DXIL::OpCode::FMin80
) {
5442
72
    Value *temp0 =
5443
72
        TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
5444
72
    Value *temp1 =
5445
72
        TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
5446
72
    Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
5447
72
    combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
5448
72
    combined = Builder.CreateInsertElement(combined, temp1, 1);
5449
72
    return combined;
5450
72
  }
5451
5452
  // Avg.
5453
48
  Value *temp0 = Builder.CreateFAdd(input0, input1);
5454
48
  Value *temp1 = Builder.CreateFAdd(input2, input3);
5455
48
  Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
5456
48
  combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
5457
48
  combined = Builder.CreateInsertElement(combined, temp1, 1);
5458
48
  Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
5459
48
  rcp = ConstantVector::getSplat(2, rcp);
5460
48
  combined = Builder.CreateFMul(combined, rcp);
5461
48
  return combined;
5462
120
}
5463
5464
Value *ResolveSmallValue(Value **pClampedResult, Value *rounded,
5465
                         Value *averageUnscaled, float cutoffVal,
5466
                         DXIL::TessellatorPartitioning partitionMode,
5467
72
                         hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5468
72
  Value *clampedResult = *pClampedResult;
5469
72
  Value *clampedVal = clampedResult;
5470
72
  Value *roundedVal = rounded;
5471
  // Do partitioning-specific clamping.
5472
72
  Value *clampedAvg =
5473
72
      ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
5474
72
  Constant *cutoffVals =
5475
72
      ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
5476
72
  if (clampedAvg->getType()->isVectorTy())
5477
24
    cutoffVals = ConstantVector::getSplat(
5478
24
        clampedAvg->getType()->getVectorNumElements(), cutoffVals);
5479
  // Limit the value.
5480
72
  clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg,
5481
72
                                          cutoffVals, hlslOP, Builder);
5482
  // Round up for integer/pow2 partitioning.
5483
72
  Value *roundedAvg =
5484
72
      RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
5485
5486
72
  if (rounded->getType() != cutoffVals->getType())
5487
48
    cutoffVals = ConstantVector::getSplat(
5488
48
        rounded->getType()->getVectorNumElements(), cutoffVals);
5489
  // If the scaled value is less than three, then take the unscaled average.
5490
72
  Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
5491
72
  if (clampedAvg->getType() != clampedVal->getType())
5492
48
    clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
5493
72
  *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
5494
5495
72
  if (roundedAvg->getType() != roundedVal->getType())
5496
48
    roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
5497
72
  Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
5498
72
  return result;
5499
72
}
5500
5501
void ResolveQuadAxes(Value **pFinalResult, Value **pClampedResult,
5502
                     float cutoffVal,
5503
                     DXIL::TessellatorPartitioning partitionMode,
5504
24
                     hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5505
24
  Value *finalResult = *pFinalResult;
5506
24
  Value *clampedResult = *pClampedResult;
5507
5508
24
  Value *clampR = clampedResult;
5509
24
  Value *finalR = finalResult;
5510
24
  Type *f32Ty = Type::getFloatTy(finalR->getContext());
5511
24
  Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
5512
5513
24
  Value *minValsX = cutoffVals;
5514
24
  Value *minValsY =
5515
24
      RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
5516
5517
24
  Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
5518
24
  Value *clampRY = Builder.CreateExtractElement(clampR, 1);
5519
24
  Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX,
5520
24
                                               clampRY, hlslOP, Builder);
5521
5522
24
  Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
5523
24
  Value *finalRY = Builder.CreateExtractElement(finalR, 1);
5524
24
  Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX,
5525
24
                                               finalRY, hlslOP, Builder);
5526
5527
  // Don't go over our threshold ("final" one is rounded).
5528
24
  Value *optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX,
5529
24
                                              minValsX, hlslOP, Builder);
5530
24
  Value *optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY,
5531
24
                                              minValsY, hlslOP, Builder);
5532
5533
24
  Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
5534
24
  Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
5535
5536
24
  cutoffVals = ConstantVector::getSplat(2, cutoffVals);
5537
24
  Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
5538
24
  *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
5539
24
  *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
5540
24
}
5541
5542
Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP,
5543
                                   OP::OpCode opcode,
5544
                                   HLOperationLowerHelper &helper,
5545
                                   HLObjectOperationLowerHelper *pObjHelper,
5546
288
                                   bool &Translated) {
5547
288
  hlsl::OP *hlslOP = &helper.hlslOP;
5548
  // Get partition mode
5549
288
  DXASSERT_NOMSG(helper.functionProps);
5550
288
  DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull,
5551
288
           "must be hull shader");
5552
288
  DXIL::TessellatorPartitioning partition =
5553
288
      helper.functionProps->ShaderProps.HS.partition;
5554
5555
288
  IRBuilder<> Builder(CI);
5556
5557
288
  DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
5558
288
  switch (IOP) {
5559
32
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
5560
64
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
5561
96
  case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
5562
96
    tessFactorOp = DXIL::OpCode::FMax;
5563
96
    break;
5564
32
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
5565
64
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
5566
96
  case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
5567
96
    tessFactorOp = DXIL::OpCode::FMin;
5568
96
    break;
5569
96
  default:
5570
    // Default is Avg.
5571
96
    break;
5572
288
  }
5573
5574
288
  Value *rawEdgeFactor =
5575
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
5576
5577
288
  Value *insideScale =
5578
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
5579
  // Clamp to [0.0f..1.0f], NaN->0.0f.
5580
288
  Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
5581
  // Do partitioning-specific clamping.
5582
288
  Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
5583
  // Round up for integer/pow2 partitioning.
5584
288
  Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
5585
  // Store the output.
5586
288
  Value *roundedEdgeFactor =
5587
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
5588
288
  Builder.CreateStore(rounded, roundedEdgeFactor);
5589
5590
  // Clamp to [1.0f..Inf], NaN->1.0f.
5591
288
  bool isQuad = false;
5592
288
  Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
5593
288
  Value *factors = nullptr;
5594
288
  switch (IOP) {
5595
32
  case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
5596
64
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
5597
96
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
5598
96
    factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5599
96
    break;
5600
32
  case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
5601
64
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
5602
96
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
5603
96
    factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5604
96
    isQuad = true;
5605
96
    break;
5606
32
  case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
5607
64
  case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
5608
96
  case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
5609
96
    factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5610
96
    break;
5611
0
  default:
5612
0
    DXASSERT(0, "invalid opcode for ProcessTessFactor");
5613
0
    break;
5614
288
  }
5615
5616
288
  Value *scaledI = nullptr;
5617
288
  if (scales->getType() == factors->getType())
5618
96
    scaledI = Builder.CreateFMul(factors, scales);
5619
192
  else {
5620
192
    Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
5621
192
    scaledI = Builder.CreateFMul(vecFactors, scales);
5622
192
  }
5623
5624
  // Do partitioning-specific clamping.
5625
288
  Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
5626
5627
  // Round up for integer/pow2 partitioning.
5628
288
  Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
5629
5630
288
  Value *finalI = roundedI;
5631
5632
288
  if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
5633
    // If not max, set to AVG.
5634
72
    if (tessFactorOp != DXIL::OpCode::FMax)
5635
48
      tessFactorOp = DXIL::OpCode::NumOpCodes;
5636
5637
72
    bool b2D = false;
5638
72
    Value *avgFactorsI = nullptr;
5639
72
    switch (IOP) {
5640
8
    case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
5641
16
    case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
5642
24
    case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
5643
24
      avgFactorsI =
5644
24
          Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5645
24
      b2D = true;
5646
24
      break;
5647
8
    case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
5648
16
    case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
5649
24
    case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
5650
24
      avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5651
24
      break;
5652
8
    case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
5653
16
    case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
5654
24
    case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
5655
24
      avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5656
24
      break;
5657
0
    default:
5658
0
      DXASSERT(0, "invalid opcode for ProcessTessFactor");
5659
0
      break;
5660
72
    }
5661
5662
72
    finalI = ResolveSmallValue(/*inout*/ &clampedI, roundedI, avgFactorsI,
5663
72
                               /*cufoff*/ 3.0, partition, hlslOP, Builder);
5664
5665
72
    if (b2D)
5666
24
      ResolveQuadAxes(/*inout*/ &finalI, /*inout*/ &clampedI, /*cutoff*/ 3.0,
5667
24
                      partition, hlslOP, Builder);
5668
72
  }
5669
5670
288
  Value *unroundedInsideFactor = CI->getArgOperand(
5671
288
      HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
5672
288
  Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
5673
288
  if (outFactorTy != clampedI->getType()) {
5674
96
    DXASSERT(isQuad, "quad only write one channel of out factor");
5675
96
    (void)isQuad;
5676
96
    clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
5677
    // Splat clampedI to float2.
5678
96
    clampedI = SplatToVector(clampedI, outFactorTy, Builder);
5679
96
  }
5680
288
  Builder.CreateStore(clampedI, unroundedInsideFactor);
5681
5682
288
  Value *roundedInsideFactor =
5683
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
5684
288
  if (outFactorTy != finalI->getType()) {
5685
96
    DXASSERT(isQuad, "quad only write one channel of out factor");
5686
96
    finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
5687
    // Splat finalI to float2.
5688
96
    finalI = SplatToVector(finalI, outFactorTy, Builder);
5689
96
  }
5690
288
  Builder.CreateStore(finalI, roundedInsideFactor);
5691
288
  return nullptr;
5692
288
}
5693
5694
} // namespace
5695
5696
// Ray Tracing.
5697
namespace {
5698
Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
5699
                                   OP::OpCode opcode,
5700
                                   HLOperationLowerHelper &helper,
5701
                                   HLObjectOperationLowerHelper *pObjHelper,
5702
142
                                   bool &Translated) {
5703
142
  hlsl::OP *hlslOP = &helper.hlslOP;
5704
142
  Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
5705
142
  Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
5706
142
  Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
5707
142
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5708
5709
142
  Type *Ty = Attr->getType();
5710
142
  Function *F = hlslOP->GetOpFunc(opcode, Ty);
5711
5712
142
  IRBuilder<> Builder(CI);
5713
142
  return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
5714
142
}
5715
5716
Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5717
                           HLOperationLowerHelper &helper,
5718
                           HLObjectOperationLowerHelper *pObjHelper,
5719
126
                           bool &Translated) {
5720
126
  hlsl::OP *hlslOP = &helper.hlslOP;
5721
126
  Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
5722
126
  Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
5723
126
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5724
5725
126
  Type *Ty = Parameter->getType();
5726
126
  Function *F = hlslOP->GetOpFunc(opcode, Ty);
5727
5728
126
  IRBuilder<> Builder(CI);
5729
126
  return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
5730
126
}
5731
5732
static void TransferRayDescArgs(Value **Args, hlsl::OP *OP,
5733
                                IRBuilder<> &Builder, CallInst *CI,
5734
732
                                unsigned &Index, unsigned &HLIndex) {
5735
  // Extract elements from flattened ray desc arguments in HL op.
5736
  // float3 Origin;
5737
732
  Value *origin = CI->getArgOperand(HLIndex++);
5738
732
  Args[Index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
5739
732
  Args[Index++] = Builder.CreateExtractElement(origin, 1);
5740
732
  Args[Index++] = Builder.CreateExtractElement(origin, 2);
5741
  // float  TMin;
5742
732
  Args[Index++] = CI->getArgOperand(HLIndex++);
5743
  // float3 Direction;
5744
732
  Value *direction = CI->getArgOperand(HLIndex++);
5745
732
  Args[Index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
5746
732
  Args[Index++] = Builder.CreateExtractElement(direction, 1);
5747
732
  Args[Index++] = Builder.CreateExtractElement(direction, 2);
5748
  // float  TMax;
5749
732
  Args[Index++] = CI->getArgOperand(HLIndex++);
5750
732
}
5751
5752
Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
5753
                         HLOperationLowerHelper &Helper,
5754
                         HLObjectOperationLowerHelper *pObjHelper,
5755
548
                         bool &Translated) {
5756
548
  hlsl::OP *OP = &Helper.hlslOP;
5757
5758
548
  Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
5759
548
  Args[0] = OP->GetU32Const(static_cast<unsigned>(OpCode));
5760
548
  unsigned Index = 1, HLIndex = 1;
5761
3.83k
  while (HLIndex < HLOperandIndex::kTraceRayRayDescOpIdx)
5762
3.28k
    Args[Index++] = CI->getArgOperand(HLIndex++);
5763
5764
548
  IRBuilder<> Builder(CI);
5765
548
  TransferRayDescArgs(Args, OP, Builder, CI, Index, HLIndex);
5766
548
  DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands() - 1);
5767
548
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayPayloadOpIdx);
5768
5769
548
  Value *Payload = CI->getArgOperand(HLIndex++);
5770
548
  Args[Index++] = Payload;
5771
5772
548
  DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands());
5773
548
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayNumOp);
5774
5775
548
  Type *Ty = Payload->getType();
5776
548
  Function *F = OP->GetOpFunc(OpCode, Ty);
5777
5778
548
  return Builder.CreateCall(F, Args);
5779
548
}
5780
5781
// RayQuery methods
5782
5783
Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP,
5784
                                 OP::OpCode opcode,
5785
                                 HLOperationLowerHelper &helper,
5786
                                 HLObjectOperationLowerHelper *pObjHelper,
5787
146
                                 bool &Translated) {
5788
146
  hlsl::OP *hlslOP = &helper.hlslOP;
5789
  // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg
5790
146
  DXASSERT(CI->getNumArgOperands() == 3,
5791
146
           "hlopcode for allocaterayquery always expects 3 arguments");
5792
5793
146
  llvm::Value *Arg =
5794
146
      CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx);
5795
146
  llvm::ConstantInt *ConstVal = llvm::dyn_cast<llvm::ConstantInt>(Arg);
5796
146
  DXASSERT(ConstVal,
5797
146
           "2nd argument to allocaterayquery must always be a constant value");
5798
146
  if (ConstVal->getValue().getZExtValue() != 0) {
5799
6
    Value *refArgs[3] = {
5800
6
        nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx),
5801
6
        CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)};
5802
6
    opcode = OP::OpCode::AllocateRayQuery2;
5803
6
    return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
5804
6
  }
5805
140
  Value *refArgs[2] = {
5806
140
      nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)};
5807
140
  return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
5808
146
}
5809
5810
Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5811
                               HLOperationLowerHelper &helper,
5812
                               HLObjectOperationLowerHelper *pObjHelper,
5813
172
                               bool &Translated) {
5814
172
  hlsl::OP *hlslOP = &helper.hlslOP;
5815
172
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5816
5817
172
  Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
5818
172
  Args[0] = opArg;
5819
172
  unsigned Index = 1, HLIndex = 1;
5820
860
  while (HLIndex < HLOperandIndex::kTraceRayInlineRayDescOpIdx)
5821
688
    Args[Index++] = CI->getArgOperand(HLIndex++);
5822
5823
172
  IRBuilder<> Builder(CI);
5824
172
  DXASSERT_NOMSG(HLIndex == HLOperandIndex::kTraceRayInlineRayDescOpIdx);
5825
172
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx);
5826
172
  TransferRayDescArgs(Args, hlslOP, Builder, CI, Index, HLIndex);
5827
172
  DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands());
5828
172
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineNumOp);
5829
5830
172
  Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
5831
5832
172
  return Builder.CreateCall(F, Args);
5833
172
}
5834
5835
Value *TranslateCommitProceduralPrimitiveHit(
5836
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5837
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5838
8
    bool &Translated) {
5839
8
  hlsl::OP *hlslOP = &helper.hlslOP;
5840
8
  Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
5841
8
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5842
8
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5843
5844
8
  Value *Args[] = {opArg, handle, THit};
5845
5846
8
  IRBuilder<> Builder(CI);
5847
8
  Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
5848
5849
8
  return Builder.CreateCall(F, Args);
5850
8
}
5851
5852
Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP,
5853
                                      OP::OpCode opcode,
5854
                                      HLOperationLowerHelper &helper,
5855
                                      HLObjectOperationLowerHelper *pObjHelper,
5856
268
                                      bool &Translated) {
5857
268
  hlsl::OP *hlslOP = &helper.hlslOP;
5858
5859
268
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5860
268
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5861
5862
268
  IRBuilder<> Builder(CI);
5863
268
  Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
5864
5865
268
  return Builder.CreateCall(F, {opArg, handle});
5866
268
}
5867
5868
Value *TranslateRayQueryMatrix3x4Operation(
5869
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5870
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5871
32
    bool &Translated) {
5872
32
  hlsl::OP *hlslOP = &helper.hlslOP;
5873
32
  VectorType *Ty = cast<VectorType>(CI->getType());
5874
32
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5875
32
  uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
5876
32
  Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
5877
32
  uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
5878
32
  Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
5879
32
  Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols},
5880
32
                                       Ty, CI, hlslOP);
5881
32
  return retVal;
5882
32
}
5883
5884
Value *TranslateRayQueryTransposedMatrix3x4Operation(
5885
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5886
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5887
32
    bool &Translated) {
5888
32
  hlsl::OP *hlslOP = &helper.hlslOP;
5889
32
  VectorType *Ty = cast<VectorType>(CI->getType());
5890
32
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5891
32
  uint32_t rVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
5892
32
  Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
5893
32
  uint8_t cVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
5894
32
  Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
5895
32
  Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols},
5896
32
                                       Ty, CI, hlslOP);
5897
32
  return retVal;
5898
32
}
5899
5900
Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP,
5901
                                     OP::OpCode opcode,
5902
                                     HLOperationLowerHelper &helper,
5903
                                     HLObjectOperationLowerHelper *pObjHelper,
5904
24
                                     bool &Translated) {
5905
24
  hlsl::OP *hlslOP = &helper.hlslOP;
5906
24
  VectorType *Ty = cast<VectorType>(CI->getType());
5907
24
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5908
24
  uint8_t elementVals[] = {0, 1};
5909
24
  Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
5910
24
  Value *retVal =
5911
24
      TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
5912
24
  return retVal;
5913
24
}
5914
5915
Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP,
5916
                                     OP::OpCode opcode,
5917
                                     HLOperationLowerHelper &helper,
5918
                                     HLObjectOperationLowerHelper *pObjHelper,
5919
48
                                     bool &Translated) {
5920
48
  hlsl::OP *hlslOP = &helper.hlslOP;
5921
48
  VectorType *Ty = cast<VectorType>(CI->getType());
5922
48
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5923
48
  uint8_t elementVals[] = {0, 1, 2};
5924
48
  Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
5925
48
  Value *retVal =
5926
48
      TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
5927
48
  return retVal;
5928
48
}
5929
5930
Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP,
5931
                                     OP::OpCode opcode,
5932
                                     HLOperationLowerHelper &helper,
5933
                                     HLObjectOperationLowerHelper *pObjHelper,
5934
446
                                     bool &Translated) {
5935
446
  hlsl::OP *hlslOP = &helper.hlslOP;
5936
446
  VectorType *Ty = cast<VectorType>(CI->getType());
5937
446
  uint8_t vals[] = {0, 1, 2, 3};
5938
446
  Constant *src = ConstantDataVector::get(CI->getContext(), vals);
5939
446
  Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
5940
446
  return retVal;
5941
446
}
5942
5943
template <typename ColElemTy>
5944
static void GetMatrixIndices(Constant *&Rows, Constant *&Cols, bool Is3x4,
5945
72
                             LLVMContext &Ctx) {
5946
72
  if (Is3x4) {
5947
48
    uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
5948
48
    Rows = ConstantDataVector::get(Ctx, RVals);
5949
48
    ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
5950
48
    Cols = ConstantDataVector::get(Ctx, CVals);
5951
48
    return;
5952
48
  }
5953
24
  uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
5954
24
  Rows = ConstantDataVector::get(Ctx, RVals);
5955
24
  ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
5956
24
  Cols = ConstantDataVector::get(Ctx, CVals);
5957
24
}
HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned char>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&)
Line
Count
Source
5945
56
                             LLVMContext &Ctx) {
5946
56
  if (Is3x4) {
5947
40
    uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
5948
40
    Rows = ConstantDataVector::get(Ctx, RVals);
5949
40
    ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
5950
40
    Cols = ConstantDataVector::get(Ctx, CVals);
5951
40
    return;
5952
40
  }
5953
16
  uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
5954
16
  Rows = ConstantDataVector::get(Ctx, RVals);
5955
16
  ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
5956
16
  Cols = ConstantDataVector::get(Ctx, CVals);
5957
16
}
HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned int>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&)
Line
Count
Source
5945
16
                             LLVMContext &Ctx) {
5946
16
  if (Is3x4) {
5947
8
    uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
5948
8
    Rows = ConstantDataVector::get(Ctx, RVals);
5949
8
    ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
5950
8
    Cols = ConstantDataVector::get(Ctx, CVals);
5951
8
    return;
5952
8
  }
5953
8
  uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
5954
8
  Rows = ConstantDataVector::get(Ctx, RVals);
5955
8
  ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
5956
8
  Cols = ConstantDataVector::get(Ctx, CVals);
5957
8
}
5958
5959
Value *TranslateNoArgMatrix3x4Operation(
5960
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5961
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5962
40
    bool &Translated) {
5963
40
  hlsl::OP *hlslOP = &helper.hlslOP;
5964
40
  VectorType *Ty = cast<VectorType>(CI->getType());
5965
40
  Constant *Rows, *Cols;
5966
40
  GetMatrixIndices<uint8_t>(Rows, Cols, true, CI->getContext());
5967
40
  return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP);
5968
40
}
5969
5970
Value *TranslateNoArgTransposedMatrix3x4Operation(
5971
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5972
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5973
16
    bool &Translated) {
5974
16
  hlsl::OP *hlslOP = &helper.hlslOP;
5975
16
  VectorType *Ty = cast<VectorType>(CI->getType());
5976
16
  Constant *Rows, *Cols;
5977
16
  GetMatrixIndices<uint8_t>(Rows, Cols, false, CI->getContext());
5978
16
  return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP);
5979
16
}
5980
5981
/*
5982
HLSL:
5983
void ThreadNodeOutputRecords<recordType>::OutputComplete();
5984
void GroupNodeOutputRecords<recordType>::OutputComplete();
5985
DXIL:
5986
void @dx.op.outputComplete(i32 %Opcode, %dx.types.NodeRecordHandle
5987
%RecordHandle)
5988
*/
5989
Value *TranslateNodeOutputComplete(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
5990
                                   HLOperationLowerHelper &helper,
5991
                                   HLObjectOperationLowerHelper *pObjHelper,
5992
142
                                   bool &Translated) {
5993
142
  hlsl::OP *OP = &helper.hlslOP;
5994
5995
142
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5996
142
  DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType());
5997
142
  Function *dxilFunc = OP->GetOpFunc(op, CI->getType());
5998
142
  Value *opArg = OP->GetU32Const((unsigned)op);
5999
6000
142
  IRBuilder<> Builder(CI);
6001
142
  return Builder.CreateCall(dxilFunc, {opArg, handle});
6002
142
}
6003
6004
Value *TranslateNoArgNoReturnPreserveOutput(
6005
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6006
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6007
140
    bool &Translated) {
6008
140
  Instruction *pResult = cast<Instruction>(
6009
140
      TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated));
6010
  // HL intrinsic must have had a return injected just after the call.
6011
  // SROA_Parameter_HLSL will copy from alloca to output just before each
6012
  // return. Now move call after the copy and just before the return.
6013
140
  if (isa<ReturnInst>(pResult->getNextNode()))
6014
0
    return pResult;
6015
140
  ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator());
6016
140
  pResult->removeFromParent();
6017
140
  pResult->insertBefore(RetI);
6018
140
  return pResult;
6019
140
}
6020
6021
// Special half dot2 with accumulate to float
6022
Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6023
                        HLOperationLowerHelper &helper,
6024
                        HLObjectOperationLowerHelper *pObjHelper,
6025
16
                        bool &Translated) {
6026
16
  hlsl::OP *hlslOP = &helper.hlslOP;
6027
16
  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
6028
16
  const unsigned vecSize = 2;
6029
16
  DXASSERT(src0->getType()->isVectorTy() &&
6030
16
               vecSize == src0->getType()->getVectorNumElements() &&
6031
16
               src0->getType()->getScalarType()->isHalfTy(),
6032
16
           "otherwise, unexpected input dimension or component type");
6033
6034
16
  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
6035
16
  DXASSERT(src0->getType() == src1->getType(),
6036
16
           "otherwise, mismatched argument types");
6037
16
  Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
6038
16
  Type *accTy = accArg->getType();
6039
16
  DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(),
6040
16
           "otherwise, unexpected accumulator type");
6041
16
  IRBuilder<> Builder(CI);
6042
6043
16
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
6044
16
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6045
6046
16
  SmallVector<Value *, 6> args;
6047
16
  args.emplace_back(opArg);
6048
16
  args.emplace_back(accArg);
6049
48
  for (unsigned i = 0; i < vecSize; 
i++32
)
6050
32
    args.emplace_back(Builder.CreateExtractElement(src0, i));
6051
48
  for (unsigned i = 0; i < vecSize; 
i++32
)
6052
32
    args.emplace_back(Builder.CreateExtractElement(src1, i));
6053
16
  return Builder.CreateCall(dxilFunc, args);
6054
16
}
6055
6056
Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6057
                              HLOperationLowerHelper &helper,
6058
                              HLObjectOperationLowerHelper *pObjHelper,
6059
32
                              bool &Translated) {
6060
32
  hlsl::OP *hlslOP = &helper.hlslOP;
6061
32
  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
6062
32
  DXASSERT(
6063
32
      !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32),
6064
32
      "otherwise, unexpected vector support in high level intrinsic template");
6065
32
  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
6066
32
  DXASSERT(src0->getType() == src1->getType(),
6067
32
           "otherwise, mismatched argument types");
6068
32
  Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
6069
32
  Type *accTy = accArg->getType();
6070
32
  DXASSERT(
6071
32
      !accTy->isVectorTy() && accTy->isIntegerTy(32),
6072
32
      "otherwise, unexpected vector support in high level intrinsic template");
6073
32
  IRBuilder<> Builder(CI);
6074
6075
32
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
6076
32
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6077
32
  return Builder.CreateCall(dxilFunc, {opArg, accArg, src0, src1});
6078
32
}
6079
6080
Value *TranslatePack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6081
                     HLOperationLowerHelper &helper,
6082
                     HLObjectOperationLowerHelper *pObjHelper,
6083
72
                     bool &Translated) {
6084
72
  hlsl::OP *hlslOP = &helper.hlslOP;
6085
6086
72
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
6087
72
  Type *valTy = val->getType();
6088
72
  Type *eltTy = valTy->getScalarType();
6089
6090
72
  DXASSERT(valTy->isVectorTy() && valTy->getVectorNumElements() == 4 &&
6091
72
               eltTy->isIntegerTy() &&
6092
72
               (eltTy->getIntegerBitWidth() == 32 ||
6093
72
                eltTy->getIntegerBitWidth() == 16),
6094
72
           "otherwise, unexpected input dimension or component type");
6095
6096
72
  DXIL::PackMode packMode = DXIL::PackMode::Trunc;
6097
72
  switch (IOP) {
6098
18
  case hlsl::IntrinsicOp::IOP_pack_clamp_s8:
6099
18
    packMode = DXIL::PackMode::SClamp;
6100
18
    break;
6101
18
  case hlsl::IntrinsicOp::IOP_pack_clamp_u8:
6102
18
    packMode = DXIL::PackMode::UClamp;
6103
18
    break;
6104
18
  case hlsl::IntrinsicOp::IOP_pack_s8:
6105
36
  case hlsl::IntrinsicOp::IOP_pack_u8:
6106
36
    packMode = DXIL::PackMode::Trunc;
6107
36
    break;
6108
0
  default:
6109
0
    DXASSERT(false, "unexpected opcode");
6110
0
    break;
6111
72
  }
6112
6113
72
  IRBuilder<> Builder(CI);
6114
72
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, eltTy);
6115
72
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6116
72
  Constant *packModeArg = hlslOP->GetU8Const((unsigned)packMode);
6117
6118
72
  Value *elt0 = Builder.CreateExtractElement(val, (uint64_t)0);
6119
72
  Value *elt1 = Builder.CreateExtractElement(val, (uint64_t)1);
6120
72
  Value *elt2 = Builder.CreateExtractElement(val, (uint64_t)2);
6121
72
  Value *elt3 = Builder.CreateExtractElement(val, (uint64_t)3);
6122
72
  return Builder.CreateCall(dxilFunc,
6123
72
                            {opArg, packModeArg, elt0, elt1, elt2, elt3});
6124
72
}
6125
6126
Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6127
                       HLOperationLowerHelper &helper,
6128
                       HLObjectOperationLowerHelper *pObjHelper,
6129
88
                       bool &Translated) {
6130
88
  hlsl::OP *hlslOP = &helper.hlslOP;
6131
6132
88
  Value *packedVal = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
6133
88
  DXASSERT(
6134
88
      !packedVal->getType()->isVectorTy() &&
6135
88
          packedVal->getType()->isIntegerTy(32),
6136
88
      "otherwise, unexpected vector support in high level intrinsic template");
6137
6138
88
  Type *overloadType = nullptr;
6139
88
  DXIL::UnpackMode unpackMode = DXIL::UnpackMode::Unsigned;
6140
88
  switch (IOP) {
6141
24
  case hlsl::IntrinsicOp::IOP_unpack_s8s32:
6142
24
    unpackMode = DXIL::UnpackMode::Signed;
6143
24
    overloadType = helper.i32Ty;
6144
24
    break;
6145
24
  case hlsl::IntrinsicOp::IOP_unpack_u8u32:
6146
24
    unpackMode = DXIL::UnpackMode::Unsigned;
6147
24
    overloadType = helper.i32Ty;
6148
24
    break;
6149
20
  case hlsl::IntrinsicOp::IOP_unpack_s8s16:
6150
20
    unpackMode = DXIL::UnpackMode::Signed;
6151
20
    overloadType = helper.i16Ty;
6152
20
    break;
6153
20
  case hlsl::IntrinsicOp::IOP_unpack_u8u16:
6154
20
    unpackMode = DXIL::UnpackMode::Unsigned;
6155
20
    overloadType = helper.i16Ty;
6156
20
    break;
6157
0
  default:
6158
0
    DXASSERT(false, "unexpected opcode");
6159
0
    break;
6160
88
  }
6161
6162
88
  IRBuilder<> Builder(CI);
6163
88
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, overloadType);
6164
88
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6165
88
  Constant *unpackModeArg = hlslOP->GetU8Const((unsigned)unpackMode);
6166
88
  Value *Res = Builder.CreateCall(dxilFunc, {opArg, unpackModeArg, packedVal});
6167
6168
  // Convert the final aggregate into a vector to make the types match
6169
88
  const unsigned vecSize = 4;
6170
88
  Value *ResVec = UndefValue::get(CI->getType());
6171
440
  for (unsigned i = 0; i < vecSize; 
++i352
) {
6172
352
    Value *Elt = Builder.CreateExtractValue(Res, i);
6173
352
    ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
6174
352
  }
6175
88
  return ResVec;
6176
88
}
6177
6178
} // namespace
6179
6180
// Shader Execution Reordering.
6181
namespace {
6182
Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP,
6183
                                 OP::OpCode Opcode,
6184
                                 HLOperationLowerHelper &Helper,
6185
                                 HLObjectOperationLowerHelper *ObjHelper,
6186
22
                                 bool &Translated) {
6187
22
  hlsl::OP *HlslOP = &Helper.hlslOP;
6188
22
  IRBuilder<> Builder(CI);
6189
22
  Value *HitObjectPtr = CI->getArgOperand(1);
6190
22
  Value *HitObject = TrivialDxilOperation(
6191
22
      Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP);
6192
22
  Builder.CreateStore(HitObject, HitObjectPtr);
6193
22
  DXASSERT(
6194
22
      CI->use_empty(),
6195
22
      "Default ctor return type is a Clang artifact. Value must not be used");
6196
22
  return nullptr;
6197
22
}
6198
6199
Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP,
6200
                                  OP::OpCode Opcode,
6201
                                  HLOperationLowerHelper &Helper,
6202
                                  HLObjectOperationLowerHelper *ObjHelper,
6203
6
                                  bool &Translated) {
6204
6
  DXASSERT_NOMSG(CI->getNumArgOperands() ==
6205
6
                 HLOperandIndex::kHitObjectMakeMiss_NumOp);
6206
6
  hlsl::OP *OP = &Helper.hlslOP;
6207
6
  IRBuilder<> Builder(CI);
6208
6
  Value *Args[DXIL::OperandIndex::kHitObjectMakeMiss_NumOp];
6209
6
  Args[0] = nullptr; // Filled in by TrivialDxilOperation
6210
6211
6
  unsigned DestIdx = 1, SrcIdx = 1;
6212
6
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6213
6
  Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // RayFlags
6214
6
  Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // MissShaderIdx
6215
6216
6
  DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx);
6217
6
  DXASSERT_NOMSG(DestIdx ==
6218
6
                 DXIL::OperandIndex::kHitObjectMakeMiss_RayDescOpIdx);
6219
6
  TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx);
6220
6
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6221
6
  DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectMakeMiss_NumOp);
6222
6223
6
  Value *OutHitObject =
6224
6
      TrivialDxilOperation(Opcode, Args, Helper.voidTy, CI, OP);
6225
6
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6226
6
  return nullptr;
6227
6
}
6228
6229
Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP,
6230
                                   OP::OpCode OpCode,
6231
                                   HLOperationLowerHelper &Helper,
6232
                                   HLObjectOperationLowerHelper *pObjHelper,
6233
36
                                   bool &Translated) {
6234
36
  hlsl::OP *OP = &Helper.hlslOP;
6235
6236
  // clang-format off
6237
  // Match MaybeReorderThread overload variants:
6238
  // void MaybeReorderThread(<Op>,
6239
  //                    HitObject Hit);
6240
  // void MaybeReorderThread(<Op>,
6241
  //                    uint CoherenceHint,
6242
  //                    uint NumCoherenceHintBitsFromLSB );
6243
  // void MaybeReorderThread(<Op>,
6244
  //                    HitObject Hit,
6245
  //                    uint CoherenceHint,
6246
  //                    uint NumCoherenceHintBitsFromLSB);
6247
  // clang-format on
6248
36
  const unsigned NumHLArgs = CI->getNumArgOperands();
6249
36
  DXASSERT_NOMSG(NumHLArgs >= 2);
6250
6251
  // Use a NOP HitObject for MaybeReorderThread without HitObject.
6252
36
  Value *HitObject = nullptr;
6253
36
  unsigned HLIndex = 1;
6254
36
  if (3 == NumHLArgs) {
6255
6
    HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr},
6256
6
                                     Type::getVoidTy(CI->getContext()), CI, OP);
6257
30
  } else {
6258
30
    Value *FirstParam = CI->getArgOperand(HLIndex);
6259
30
    DXASSERT_NOMSG(isa<PointerType>(FirstParam->getType()));
6260
30
    IRBuilder<> Builder(CI);
6261
30
    HitObject = Builder.CreateLoad(FirstParam);
6262
30
    HLIndex++;
6263
30
  }
6264
6265
  // If there are trailing parameters, these have to be the two coherence bit
6266
  // parameters
6267
36
  Value *CoherenceHint = nullptr;
6268
36
  Value *NumCoherenceHintBits = nullptr;
6269
36
  if (2 != NumHLArgs) {
6270
12
    DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs);
6271
12
    CoherenceHint = CI->getArgOperand(HLIndex++);
6272
12
    NumCoherenceHintBits = CI->getArgOperand(HLIndex++);
6273
12
    DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType());
6274
12
    DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType());
6275
24
  } else {
6276
24
    CoherenceHint = UndefValue::get(Helper.i32Ty);
6277
24
    NumCoherenceHintBits = OP->GetU32Const(0);
6278
24
  }
6279
6280
36
  TrivialDxilOperation(
6281
36
      OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits},
6282
36
      Type::getVoidTy(CI->getContext()), CI, OP);
6283
36
  return nullptr;
6284
36
}
6285
6286
Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP,
6287
                                      OP::OpCode OpCode,
6288
                                      HLOperationLowerHelper &Helper,
6289
                                      HLObjectOperationLowerHelper *pObjHelper,
6290
8
                                      bool &Translated) {
6291
8
  hlsl::OP *OP = &Helper.hlslOP;
6292
8
  IRBuilder<> Builder(CI);
6293
6294
8
  unsigned SrcIdx = 1;
6295
8
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6296
8
  Value *RayQuery = CI->getArgOperand(SrcIdx++);
6297
6298
8
  if (CI->getNumArgOperands() ==
6299
8
      HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) {
6300
4
    Value *HitKind = CI->getArgOperand(SrcIdx++);
6301
4
    Value *AttribSrc = CI->getArgOperand(SrcIdx++);
6302
4
    DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6303
4
    OpCode = DXIL::OpCode::HitObject_FromRayQueryWithAttrs;
6304
4
    Type *AttrTy = AttribSrc->getType();
6305
4
    Value *OutHitObject = TrivialDxilOperation(
6306
4
        OpCode, {nullptr, RayQuery, HitKind, AttribSrc}, AttrTy, CI, OP);
6307
4
    Builder.CreateStore(OutHitObject, HitObjectPtr);
6308
4
    return nullptr;
6309
4
  }
6310
6311
4
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6312
4
  OpCode = DXIL::OpCode::HitObject_FromRayQuery;
6313
4
  Value *OutHitObject =
6314
4
      TrivialDxilOperation(OpCode, {nullptr, RayQuery}, Helper.voidTy, CI, OP);
6315
4
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6316
4
  return nullptr;
6317
8
}
6318
6319
Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP,
6320
                                  OP::OpCode OpCode,
6321
                                  HLOperationLowerHelper &Helper,
6322
                                  HLObjectOperationLowerHelper *pObjHelper,
6323
6
                                  bool &Translated) {
6324
6
  hlsl::OP *OP = &Helper.hlslOP;
6325
6
  IRBuilder<> Builder(CI);
6326
6327
6
  DXASSERT_NOMSG(CI->getNumArgOperands() ==
6328
6
                 HLOperandIndex::kHitObjectTraceRay_NumOp);
6329
6
  Value *Args[DXIL::OperandIndex::kHitObjectTraceRay_NumOp];
6330
6
  Value *OpArg = OP->GetU32Const(static_cast<unsigned>(OpCode));
6331
6
  Args[0] = OpArg;
6332
6333
6
  unsigned DestIdx = 1, SrcIdx = 1;
6334
6
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6335
6
  Args[DestIdx++] = CI->getArgOperand(SrcIdx++);
6336
36
  for (; SrcIdx < HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx;
6337
30
       ++SrcIdx, ++DestIdx) {
6338
30
    Args[DestIdx] = CI->getArgOperand(SrcIdx);
6339
30
  }
6340
6341
6
  DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx);
6342
6
  DXASSERT_NOMSG(DestIdx ==
6343
6
                 DXIL::OperandIndex::kHitObjectTraceRay_RayDescOpIdx);
6344
6
  TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx);
6345
6
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands() - 1);
6346
6
  DXASSERT_NOMSG(DestIdx ==
6347
6
                 DXIL::OperandIndex::kHitObjectTraceRay_PayloadOpIdx);
6348
6349
6
  Value *Payload = CI->getArgOperand(SrcIdx++);
6350
6
  Args[DestIdx++] = Payload;
6351
6352
6
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6353
6
  DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectTraceRay_NumOp);
6354
6355
6
  Function *F = OP->GetOpFunc(OpCode, Payload->getType());
6356
6357
6
  Value *OutHitObject = Builder.CreateCall(F, Args);
6358
6
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6359
6
  return nullptr;
6360
6
}
6361
6362
Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP,
6363
                                OP::OpCode OpCode,
6364
                                HLOperationLowerHelper &Helper,
6365
                                HLObjectOperationLowerHelper *pObjHelper,
6366
4
                                bool &Translated) {
6367
4
  unsigned SrcIdx = 1;
6368
4
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6369
4
  Value *Payload = CI->getArgOperand(SrcIdx++);
6370
4
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6371
6372
4
  IRBuilder<> Builder(CI);
6373
4
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6374
4
  TrivialDxilOperation(OpCode, {nullptr, HitObject, Payload},
6375
4
                       Payload->getType(), CI, &Helper.hlslOP);
6376
4
  return nullptr;
6377
4
}
6378
6379
Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP,
6380
                                       OP::OpCode OpCode,
6381
                                       HLOperationLowerHelper &Helper,
6382
                                       HLObjectOperationLowerHelper *pObjHelper,
6383
6
                                       bool &Translated) {
6384
6
  hlsl::OP *OP = &Helper.hlslOP;
6385
6
  IRBuilder<> Builder(CI);
6386
6387
6
  Value *HitObjectPtr = CI->getArgOperand(1);
6388
6
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6389
6
  Value *AttrOutPtr =
6390
6
      CI->getArgOperand(HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx);
6391
6
  TrivialDxilOperation(OpCode, {nullptr, HitObject, AttrOutPtr},
6392
6
                       AttrOutPtr->getType(), CI, OP);
6393
6
  return nullptr;
6394
6
}
6395
6396
Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP,
6397
                                      OP::OpCode OpCode,
6398
                                      HLOperationLowerHelper &Helper,
6399
                                      HLObjectOperationLowerHelper *pObjHelper,
6400
48
                                      bool &Translated) {
6401
48
  hlsl::OP *OP = &Helper.hlslOP;
6402
48
  Value *HitObjectPtr = CI->getArgOperand(1);
6403
48
  IRBuilder<> Builder(CI);
6404
48
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6405
48
  return TrivialDxilOperation(OpCode, {nullptr, HitObject}, CI->getType(), CI,
6406
48
                              OP);
6407
48
}
6408
6409
Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP,
6410
                                      OP::OpCode OpCode,
6411
                                      HLOperationLowerHelper &Helper,
6412
                                      HLObjectOperationLowerHelper *pObjHelper,
6413
16
                                      bool &Translated) {
6414
16
  hlsl::OP *OP = &Helper.hlslOP;
6415
16
  Value *HitObjectPtr = CI->getArgOperand(1);
6416
16
  IRBuilder<> Builder(CI);
6417
16
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6418
16
  VectorType *Ty = cast<VectorType>(CI->getType());
6419
16
  uint32_t Vals[] = {0, 1, 2, 3};
6420
16
  Constant *Src = ConstantDataVector::get(CI->getContext(), Vals);
6421
16
  return TrivialDxilOperation(OpCode, {nullptr, HitObject, Src}, Ty, CI, OP);
6422
16
}
6423
6424
16
static bool IsHitObject3x4Getter(IntrinsicOp IOP) {
6425
16
  switch (IOP) {
6426
8
  default:
6427
8
    return false;
6428
4
  case IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4:
6429
8
  case IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4:
6430
8
    return true;
6431
16
  }
6432
16
}
6433
6434
Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP,
6435
                                      OP::OpCode OpCode,
6436
                                      HLOperationLowerHelper &Helper,
6437
                                      HLObjectOperationLowerHelper *pObjHelper,
6438
16
                                      bool &Translated) {
6439
16
  hlsl::OP *OP = &Helper.hlslOP;
6440
16
  Value *HitObjectPtr = CI->getArgOperand(1);
6441
16
  IRBuilder<> Builder(CI);
6442
16
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6443
6444
  // Create 3x4 matrix indices
6445
16
  bool Is3x4 = IsHitObject3x4Getter(IOP);
6446
16
  Constant *Rows, *Cols;
6447
16
  GetMatrixIndices<uint32_t>(Rows, Cols, Is3x4, CI->getContext());
6448
6449
16
  VectorType *Ty = cast<VectorType>(CI->getType());
6450
16
  return TrivialDxilOperation(OpCode, {nullptr, HitObject, Rows, Cols}, Ty, CI,
6451
16
                              OP);
6452
16
}
6453
6454
Value *TranslateHitObjectLoadLocalRootTableConstant(
6455
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6456
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
6457
4
    bool &Translated) {
6458
4
  hlsl::OP *OP = &Helper.hlslOP;
6459
4
  IRBuilder<> Builder(CI);
6460
6461
4
  Value *HitObjectPtr = CI->getArgOperand(1);
6462
4
  Value *Offset = CI->getArgOperand(2);
6463
6464
4
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6465
4
  return TrivialDxilOperation(OpCode, {nullptr, HitObject, Offset},
6466
4
                              Helper.voidTy, CI, OP);
6467
4
}
6468
6469
Value *TranslateHitObjectSetShaderTableIndex(
6470
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6471
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
6472
4
    bool &Translated) {
6473
4
  hlsl::OP *OP = &Helper.hlslOP;
6474
4
  IRBuilder<> Builder(CI);
6475
6476
4
  Value *HitObjectPtr = CI->getArgOperand(1);
6477
4
  Value *ShaderTableIndex = CI->getArgOperand(2);
6478
6479
4
  Value *InHitObject = Builder.CreateLoad(HitObjectPtr);
6480
4
  Value *OutHitObject = TrivialDxilOperation(
6481
4
      OpCode, {nullptr, InHitObject, ShaderTableIndex}, Helper.voidTy, CI, OP);
6482
4
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6483
4
  return nullptr;
6484
4
}
6485
6486
} // namespace
6487
6488
// Resource Handle.
6489
namespace {
6490
Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP,
6491
                                  DXIL::OpCode opcode,
6492
                                  HLOperationLowerHelper &helper,
6493
                                  HLObjectOperationLowerHelper *pObjHelper,
6494
602
                                  bool &Translated) {
6495
602
  hlsl::OP &hlslOP = helper.hlslOP;
6496
602
  Function *dxilFunc = hlslOP.GetOpFunc(opcode, helper.voidTy);
6497
602
  IRBuilder<> Builder(CI);
6498
602
  Value *opArg = ConstantInt::get(helper.i32Ty, (unsigned)opcode);
6499
602
  return Builder.CreateCall(
6500
602
      dxilFunc, {opArg, CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx),
6501
602
                 CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx),
6502
                 // TODO: update nonUniformIndex later.
6503
602
                 Builder.getInt1(false)});
6504
602
}
6505
} // namespace
6506
6507
// Translate and/or/select intrinsics
6508
namespace {
6509
Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6510
                    HLOperationLowerHelper &helper,
6511
                    HLObjectOperationLowerHelper *pObjHelper,
6512
60
                    bool &Translated) {
6513
60
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
6514
60
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
6515
60
  IRBuilder<> Builder(CI);
6516
6517
60
  return Builder.CreateAnd(x, y);
6518
60
}
6519
Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6520
                   HLOperationLowerHelper &helper,
6521
60
                   HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
6522
60
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
6523
60
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
6524
60
  IRBuilder<> Builder(CI);
6525
6526
60
  return Builder.CreateOr(x, y);
6527
60
}
6528
Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6529
                       HLOperationLowerHelper &helper,
6530
                       HLObjectOperationLowerHelper *pObjHelper,
6531
30
                       bool &Translated) {
6532
30
  Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
6533
30
  Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
6534
30
  Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
6535
30
  IRBuilder<> Builder(CI);
6536
6537
30
  return Builder.CreateSelect(cond, t, f);
6538
30
}
6539
6540
Value *TranslateMatVecMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6541
                          HLOperationLowerHelper &Helper,
6542
                          HLObjectOperationLowerHelper *ObjHelper,
6543
42
                          bool &Translated) {
6544
6545
42
  hlsl::OP *HlslOp = &Helper.hlslOP;
6546
42
  IRBuilder<> Builder(CI);
6547
6548
42
  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
6549
6550
  // Input parameters
6551
42
  Value *InputVector =
6552
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulInputVectorIdx);
6553
42
  Value *InputIsUnsigned =
6554
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulIsInputUnsignedIdx);
6555
42
  Value *InputInterpretation =
6556
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulInputInterpretationIdx);
6557
6558
  // Matrix parameters
6559
42
  Value *MatrixBuffer =
6560
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixBufferIdx);
6561
42
  Value *MatrixOffset =
6562
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixOffsetIdx);
6563
42
  Value *MatrixInterpretation =
6564
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixInterpretationIdx);
6565
42
  Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixMIdx);
6566
42
  Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixKIdx);
6567
42
  Value *MatrixLayout =
6568
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixLayoutIdx);
6569
42
  Value *MatrixTranspose =
6570
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixTransposeIdx);
6571
42
  Value *MatrixStride =
6572
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixStrideIdx);
6573
6574
  // Output parameters
6575
42
  Value *OutputIsUnsigned =
6576
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulIsOutputUnsignedIdx);
6577
6578
  // Get the DXIL function for the operation
6579
42
  Function *DxilFunc = HlslOp->GetOpFunc(
6580
42
      OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx)
6581
42
                   ->getType()
6582
42
                   ->getPointerElementType(),
6583
42
               InputVector->getType()});
6584
6585
  // Create a call to the DXIL function
6586
42
  Value *NewCI = Builder.CreateCall(
6587
42
      DxilFunc,
6588
42
      {OpArg, InputVector, InputIsUnsigned, InputInterpretation, MatrixBuffer,
6589
42
       MatrixOffset, MatrixInterpretation, MatrixM, MatrixK, MatrixLayout,
6590
42
       MatrixTranspose, MatrixStride, OutputIsUnsigned});
6591
6592
  // Get the output parameter and store the result
6593
42
  Value *OutParam =
6594
42
      CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx);
6595
6596
42
  Builder.CreateStore(NewCI, OutParam);
6597
6598
42
  return nullptr;
6599
42
}
6600
6601
Value *TranslateMatVecMulAdd(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6602
                             HLOperationLowerHelper &Helper,
6603
                             HLObjectOperationLowerHelper *ObjHelper,
6604
34
                             bool &Translated) {
6605
6606
34
  hlsl::OP *HlslOp = &Helper.hlslOP;
6607
34
  IRBuilder<> Builder(CI);
6608
6609
34
  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
6610
6611
  // Input vector parameters
6612
34
  Value *InputVector =
6613
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputVectorIdx);
6614
34
  Value *InputIsUnsigned =
6615
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsInputUnsignedIdx);
6616
34
  Value *InputInterpretation =
6617
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputInterpretationIdx);
6618
6619
  // Matrix parameters
6620
34
  Value *MatrixBuffer =
6621
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixBufferIdx);
6622
34
  Value *MatrixOffset =
6623
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixOffsetIdx);
6624
34
  Value *MatrixInterpretation =
6625
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixInterpretationIdx);
6626
34
  Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixMIdx);
6627
34
  Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixKIdx);
6628
34
  Value *MatrixLayout =
6629
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixLayoutIdx);
6630
34
  Value *MatrixTranspose =
6631
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixTransposeIdx);
6632
34
  Value *MatrixStride =
6633
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixStrideIdx);
6634
6635
  // Bias parameters
6636
34
  Value *BiasBuffer =
6637
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasBufferIdx);
6638
34
  Value *BiasOffset =
6639
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasOffsetIdx);
6640
34
  Value *BiasInterpretation =
6641
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasInterpretationIdx);
6642
6643
  // Output parameters
6644
34
  Value *OutputIsUnsigned =
6645
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsOutputUnsignedIdx);
6646
6647
  // Get the DXIL function for the operation
6648
34
  Function *DxilFunc = HlslOp->GetOpFunc(
6649
34
      OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx)
6650
34
                   ->getType()
6651
34
                   ->getPointerElementType(),
6652
34
               InputVector->getType()});
6653
6654
  // Create a call to the DXIL function
6655
34
  Value *NewCI = Builder.CreateCall(
6656
34
      DxilFunc, {OpArg, InputVector, InputIsUnsigned, InputInterpretation,
6657
34
                 MatrixBuffer, MatrixOffset, MatrixInterpretation, MatrixM,
6658
34
                 MatrixK, MatrixLayout, MatrixTranspose, MatrixStride,
6659
34
                 BiasBuffer, BiasOffset, BiasInterpretation, OutputIsUnsigned});
6660
6661
  // Store the result in the output parameter
6662
34
  Value *OutParam =
6663
34
      CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx);
6664
34
  Builder.CreateStore(NewCI, OutParam);
6665
6666
34
  return nullptr;
6667
34
}
6668
6669
Value *TranslateOuterProductAccumulate(CallInst *CI, IntrinsicOp IOP,
6670
                                       OP::OpCode OpCode,
6671
                                       HLOperationLowerHelper &Helper,
6672
                                       HLObjectOperationLowerHelper *ObjHelper,
6673
22
                                       bool &Translated) {
6674
6675
22
  hlsl::OP *HlslOp = &Helper.hlslOP;
6676
22
  IRBuilder<> Builder(CI);
6677
6678
22
  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
6679
6680
  // Input vector parameters
6681
22
  Value *InputVector1 =
6682
22
      CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec1Idx);
6683
22
  Value *InputVector2 =
6684
22
      CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec2Idx);
6685
6686
  // Matrix parameters
6687
22
  Value *MatrixBuffer =
6688
22
      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixIdx);
6689
22
  Value *MatrixOffset =
6690
22
      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixOffsetIdx);
6691
22
  Value *MatrixInterpretation =
6692
22
      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixInterpretationIdx);
6693
22
  Value *MatrixLayout =
6694
22
      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixLayoutIdx);
6695
22
  Value *MatrixStride =
6696
22
      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixStrideIdx);
6697
6698
  // Get the DXIL function for the operation
6699
22
  Function *DxilFunc = HlslOp->GetOpFunc(
6700
22
      OpCode, {InputVector1->getType(), InputVector2->getType()});
6701
6702
22
  return Builder.CreateCall(
6703
22
      DxilFunc, {OpArg, InputVector1, InputVector2, MatrixBuffer, MatrixOffset,
6704
22
                 MatrixInterpretation, MatrixLayout, MatrixStride});
6705
22
}
6706
6707
Value *TranslateVectorAccumulate(CallInst *CI, IntrinsicOp IOP,
6708
                                 OP::OpCode OpCode,
6709
                                 HLOperationLowerHelper &Helper,
6710
                                 HLObjectOperationLowerHelper *ObjHelper,
6711
18
                                 bool &Translated) {
6712
6713
18
  hlsl::OP *HlslOp = &Helper.hlslOP;
6714
18
  IRBuilder<> Builder(CI);
6715
6716
18
  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
6717
6718
  // Input vector parameter
6719
18
  Value *InputVector = CI->getArgOperand(HLOperandIndex::kVectorAccInputVecIdx);
6720
6721
  // Matrix parameters
6722
18
  Value *MatrixBuffer = CI->getArgOperand(HLOperandIndex::kVectorAccMatrixIdx);
6723
18
  Value *MatrixOffset =
6724
18
      CI->getArgOperand(HLOperandIndex::kVectorAccMatrixOffsetIdx);
6725
6726
  // Get the DXIL function for the operation
6727
18
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType());
6728
6729
18
  return Builder.CreateCall(DxilFunc,
6730
18
                            {OpArg, InputVector, MatrixBuffer, MatrixOffset});
6731
18
}
6732
6733
} // namespace
6734
6735
// Lower table.
6736
namespace {
6737
6738
Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
6739
                  HLOperationLowerHelper &helper,
6740
6
                  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
6741
6
  Translated = false;
6742
6
  dxilutil::EmitErrorOnInstruction(CI, "Unsupported intrinsic.");
6743
6
  return nullptr;
6744
6
}
6745
6746
// SPIRV change starts
6747
Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
6748
                                  DXIL::OpCode opcode,
6749
                                  HLOperationLowerHelper &helper,
6750
                                  HLObjectOperationLowerHelper *pObjHelper,
6751
0
                                  bool &Translated) {
6752
0
  Translated = false;
6753
0
  dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic.");
6754
0
  return nullptr;
6755
0
}
6756
// SPIRV change ends
6757
6758
Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
6759
                         HLOperationLowerHelper &helper,
6760
                         HLObjectOperationLowerHelper *pObjHelper,
6761
0
                         bool &Translated) {
6762
  // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
6763
  // Do nothing here.
6764
  // Mark not translated.
6765
0
  Translated = false;
6766
0
  return nullptr;
6767
0
}
6768
6769
// This table has to match IntrinsicOp orders
6770
IntrinsicLower gLowerTable[] = {
6771
    {IntrinsicOp::IOP_AcceptHitAndEndSearch,
6772
     TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch},
6773
    {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
6774
    {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
6775
    {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier,
6776
     DXIL::OpCode::Barrier},
6777
    {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery,
6778
     DXIL::OpCode::AllocateRayQuery},
6779
    {IntrinsicOp::IOP_Barrier, TranslateBarrier, DXIL::OpCode::NumOpCodes},
6780
    {IntrinsicOp::IOP_CallShader, TranslateCallShader,
6781
     DXIL::OpCode::CallShader},
6782
    {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess,
6783
     DXIL::OpCode::CheckAccessFullyMapped},
6784
    {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap,
6785
     DXIL::OpCode::CreateHandleFromHeap},
6786
    {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4,
6787
     DXIL::OpCode::NumOpCodes},
6788
    {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier,
6789
     DXIL::OpCode::Barrier},
6790
    {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier,
6791
     DXIL::OpCode::Barrier},
6792
    {IntrinsicOp::IOP_DispatchMesh, TrivialDispatchMesh,
6793
     DXIL::OpCode::DispatchMesh},
6794
    {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation,
6795
     DXIL::OpCode::DispatchRaysDimensions},
6796
    {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation,
6797
     DXIL::OpCode::DispatchRaysIndex},
6798
    {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample,
6799
     DXIL::OpCode::NumOpCodes},
6800
    {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid,
6801
     DXIL::OpCode::EvalCentroid},
6802
    {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped,
6803
     DXIL::OpCode::NumOpCodes},
6804
    {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation,
6805
     DXIL::OpCode::GeometryIndex},
6806
    {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex,
6807
     DXIL::OpCode::AttributeAtVertex},
6808
    {IntrinsicOp::IOP_GetRemainingRecursionLevels, TrivialNoArgOperation,
6809
     DXIL::OpCode::GetRemainingRecursionLevels},
6810
    {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation,
6811
     DXIL::OpCode::RenderTargetGetSampleCount},
6812
    {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos,
6813
     DXIL::OpCode::NumOpCodes},
6814
    {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier,
6815
     DXIL::OpCode::Barrier},
6816
    {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier,
6817
     DXIL::OpCode::Barrier},
6818
    {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation,
6819
     DXIL::OpCode::HitKind},
6820
    {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput,
6821
     DXIL::OpCode::IgnoreHit},
6822
    {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation,
6823
     DXIL::OpCode::InstanceID},
6824
    {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation,
6825
     DXIL::OpCode::InstanceIndex},
6826
    {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation,
6827
     DXIL::OpCode::NumOpCodes},
6828
    {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation,
6829
     DXIL::OpCode::NumOpCodes},
6830
    {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg,
6831
     DXIL::OpCode::NumOpCodes},
6832
    {IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise,
6833
     TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
6834
    {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg,
6835
     DXIL::OpCode::NumOpCodes},
6836
    {IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise,
6837
     TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
6838
    {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation,
6839
     DXIL::OpCode::NumOpCodes},
6840
    {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation,
6841
     DXIL::OpCode::NumOpCodes},
6842
    {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation,
6843
     DXIL::OpCode::NumOpCodes},
6844
    {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation,
6845
     DXIL::OpCode::NumOpCodes},
6846
    {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation,
6847
     DXIL::OpCode::NumOpCodes},
6848
    {IntrinsicOp::IOP_IsHelperLane, TrivialNoArgWithRetOperation,
6849
     DXIL::OpCode::IsHelperLane},
6850
    {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex,
6851
     DXIL::OpCode::NumOpCodes},
6852
    {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation,
6853
     DXIL::OpCode::ObjectRayDirection},
6854
    {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation,
6855
     DXIL::OpCode::ObjectRayOrigin},
6856
    {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation,
6857
     DXIL::OpCode::ObjectToWorld},
6858
    {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation,
6859
     DXIL::OpCode::ObjectToWorld},
6860
    {IntrinsicOp::IOP_ObjectToWorld4x3,
6861
     TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
6862
    {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation,
6863
     DXIL::OpCode::PrimitiveIndex},
6864
    {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors,
6865
     DXIL::OpCode::NumOpCodes},
6866
    {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors,
6867
     DXIL::OpCode::NumOpCodes},
6868
    {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors,
6869
     DXIL::OpCode::NumOpCodes},
6870
    {IntrinsicOp::IOP_ProcessIsolineTessFactors,
6871
     TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
6872
    {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors,
6873
     DXIL::OpCode::NumOpCodes},
6874
    {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors,
6875
     DXIL::OpCode::NumOpCodes},
6876
    {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors,
6877
     DXIL::OpCode::NumOpCodes},
6878
    {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors,
6879
     DXIL::OpCode::NumOpCodes},
6880
    {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors,
6881
     DXIL::OpCode::NumOpCodes},
6882
    {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors,
6883
     DXIL::OpCode::NumOpCodes},
6884
    {IntrinsicOp::IOP_QuadAll, TranslateQuadAnyAll, DXIL::OpCode::QuadVote},
6885
    {IntrinsicOp::IOP_QuadAny, TranslateQuadAnyAll, DXIL::OpCode::QuadVote},
6886
    {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross,
6887
     DXIL::OpCode::QuadOp},
6888
    {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross,
6889
     DXIL::OpCode::QuadOp},
6890
    {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross,
6891
     DXIL::OpCode::QuadOp},
6892
    {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt,
6893
     DXIL::OpCode::NumOpCodes},
6894
    {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation,
6895
     DXIL::OpCode::RayFlags},
6896
    {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation,
6897
     DXIL::OpCode::RayTCurrent},
6898
    {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation,
6899
     DXIL::OpCode::RayTMin},
6900
    {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection,
6901
     DXIL::OpCode::ReportHit},
6902
    {IntrinsicOp::IOP_SetMeshOutputCounts, TrivialSetMeshOutputCounts,
6903
     DXIL::OpCode::SetMeshOutputCounts},
6904
    {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
6905
    {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual,
6906
     DXIL::OpCode::WaveActiveAllEqual},
6907
    {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B,
6908
     DXIL::OpCode::WaveAllTrue},
6909
    {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B,
6910
     DXIL::OpCode::WaveAnyTrue},
6911
    {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot,
6912
     DXIL::OpCode::WaveActiveBallot},
6913
    {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A,
6914
     DXIL::OpCode::WaveActiveBit},
6915
    {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A,
6916
     DXIL::OpCode::WaveActiveBit},
6917
    {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A,
6918
     DXIL::OpCode::WaveActiveBit},
6919
    {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B,
6920
     DXIL::OpCode::WaveAllBitCount},
6921
    {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A,
6922
     DXIL::OpCode::WaveActiveOp},
6923
    {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A,
6924
     DXIL::OpCode::WaveActiveOp},
6925
    {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A,
6926
     DXIL::OpCode::WaveActiveOp},
6927
    {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A,
6928
     DXIL::OpCode::WaveActiveOp},
6929
    {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal,
6930
     DXIL::OpCode::WaveGetLaneCount},
6931
    {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal,
6932
     DXIL::OpCode::WaveGetLaneIndex},
6933
    {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal,
6934
     DXIL::OpCode::WaveIsFirstLane},
6935
    {IntrinsicOp::IOP_WaveMatch, TranslateWaveMatch, DXIL::OpCode::WaveMatch},
6936
    {IntrinsicOp::IOP_WaveMultiPrefixBitAnd, TranslateWaveMultiPrefix,
6937
     DXIL::OpCode::WaveMultiPrefixOp},
6938
    {IntrinsicOp::IOP_WaveMultiPrefixBitOr, TranslateWaveMultiPrefix,
6939
     DXIL::OpCode::WaveMultiPrefixOp},
6940
    {IntrinsicOp::IOP_WaveMultiPrefixBitXor, TranslateWaveMultiPrefix,
6941
     DXIL::OpCode::WaveMultiPrefixOp},
6942
    {IntrinsicOp::IOP_WaveMultiPrefixCountBits,
6943
     TranslateWaveMultiPrefixBitCount, DXIL::OpCode::WaveMultiPrefixBitCount},
6944
    {IntrinsicOp::IOP_WaveMultiPrefixProduct, TranslateWaveMultiPrefix,
6945
     DXIL::OpCode::WaveMultiPrefixOp},
6946
    {IntrinsicOp::IOP_WaveMultiPrefixSum, TranslateWaveMultiPrefix,
6947
     DXIL::OpCode::WaveMultiPrefixOp},
6948
    {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B,
6949
     DXIL::OpCode::WavePrefixBitCount},
6950
    {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A,
6951
     DXIL::OpCode::WavePrefixOp},
6952
    {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A,
6953
     DXIL::OpCode::WavePrefixOp},
6954
    {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt,
6955
     DXIL::OpCode::WaveReadLaneAt},
6956
    {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst,
6957
     DXIL::OpCode::WaveReadLaneFirst},
6958
    {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation,
6959
     DXIL::OpCode::WorldRayDirection},
6960
    {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation,
6961
     DXIL::OpCode::WorldRayOrigin},
6962
    {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation,
6963
     DXIL::OpCode::WorldToObject},
6964
    {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation,
6965
     DXIL::OpCode::WorldToObject},
6966
    {IntrinsicOp::IOP_WorldToObject4x3,
6967
     TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject},
6968
    {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
6969
    {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes},
6970
    {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
6971
    {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
6972
    {IntrinsicOp::IOP_and, TranslateAnd, DXIL::OpCode::NumOpCodes},
6973
    {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
6974
    {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
6975
    {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
6976
    {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
6977
    {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
6978
    {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
6979
    {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
6980
    {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
6981
    {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
6982
    {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
6983
    {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
6984
    {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
6985
    {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
6986
    {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
6987
    {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
6988
    {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
6989
    {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet,
6990
     DXIL::OpCode::Countbits},
6991
    {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
6992
    {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
6993
    {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation,
6994
     DXIL::OpCode::DerivCoarseX},
6995
    {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation,
6996
     DXIL::OpCode::DerivFineX},
6997
    {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
6998
    {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation,
6999
     DXIL::OpCode::DerivCoarseY},
7000
    {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation,
7001
     DXIL::OpCode::DerivFineY},
7002
    {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
7003
    {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
7004
    {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
7005
    {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
7006
    {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf},
7007
    {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked,
7008
     DXIL::OpCode::Dot4AddI8Packed},
7009
    {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked,
7010
     DXIL::OpCode::Dot4AddU8Packed},
7011
    {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
7012
    {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
7013
    {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
7014
    {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32,
7015
     DXIL::OpCode::LegacyF16ToF32},
7016
    {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16,
7017
     DXIL::OpCode::LegacyF32ToF16},
7018
    {IntrinsicOp::IOP_faceforward, TranslateFaceforward,
7019
     DXIL::OpCode::NumOpCodes},
7020
    {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi,
7021
     DXIL::OpCode::FirstbitSHi},
7022
    {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo,
7023
     DXIL::OpCode::FirstbitLo},
7024
    {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
7025
    {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
7026
    {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
7027
    {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
7028
    {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
7029
    {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
7030
    {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
7031
    {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
7032
    {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
7033
    {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
7034
    {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
7035
    {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
7036
    {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
7037
    {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
7038
    {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
7039
    {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
7040
    {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
7041
    {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
7042
    {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
7043
    {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
7044
    {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
7045
    {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes},
7046
    {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
7047
    {IntrinsicOp::IOP_or, TranslateOr, DXIL::OpCode::NumOpCodes},
7048
    {IntrinsicOp::IOP_pack_clamp_s8, TranslatePack, DXIL::OpCode::Pack4x8},
7049
    {IntrinsicOp::IOP_pack_clamp_u8, TranslatePack, DXIL::OpCode::Pack4x8},
7050
    {IntrinsicOp::IOP_pack_s8, TranslatePack, DXIL::OpCode::Pack4x8},
7051
    {IntrinsicOp::IOP_pack_u8, TranslatePack, DXIL::OpCode::Pack4x8},
7052
    {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
7053
    {IntrinsicOp::IOP_printf, TranslatePrintf, DXIL::OpCode::NumOpCodes},
7054
    {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
7055
    {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
7056
    {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
7057
    {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
7058
    {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
7059
    {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
7060
    {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
7061
    {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
7062
    {IntrinsicOp::IOP_select, TranslateSelect, DXIL::OpCode::NumOpCodes},
7063
    {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
7064
    {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
7065
    {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
7066
    {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
7067
    {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep,
7068
     DXIL::OpCode::NumOpCodes},
7069
    {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
7070
    {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
7071
    {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
7072
    {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
7073
    {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
7074
    {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
7075
    {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7076
    {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7077
    {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
7078
    {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7079
    {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
7080
    {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7081
    {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7082
    {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
7083
    {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7084
    {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
7085
    {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7086
    {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7087
    {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
7088
    {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7089
    {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
7090
    {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7091
    {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7092
    {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
7093
    {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7094
    {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
7095
    {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
7096
    {IntrinsicOp::IOP_unpack_s8s16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7097
    {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7098
    {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7099
    {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7100
    {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic,
7101
     DXIL::OpCode::NumOpCodes},
7102
    {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic,
7103
     DXIL::OpCode::NumOpCodes},
7104
    {IntrinsicOp::IOP_VkReadClock, UnsupportedVulkanIntrinsic,
7105
     DXIL::OpCode::NumOpCodes},
7106
    {IntrinsicOp::IOP_Vkext_execution_mode, UnsupportedVulkanIntrinsic,
7107
     DXIL::OpCode::NumOpCodes},
7108
    {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic,
7109
     DXIL::OpCode::NumOpCodes},
7110
    {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
7111
    {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
7112
    {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD,
7113
     DXIL::OpCode::NumOpCodes},
7114
    {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD,
7115
     DXIL::OpCode::NumOpCodes},
7116
    {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions,
7117
     DXIL::OpCode::NumOpCodes},
7118
    {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7119
    {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
7120
    {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
7121
    {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
7122
    {IntrinsicOp::MOP_SampleCmpBias, TranslateSample,
7123
     DXIL::OpCode::SampleCmpBias},
7124
    {IntrinsicOp::MOP_SampleCmpGrad, TranslateSample,
7125
     DXIL::OpCode::SampleCmpGrad},
7126
    {IntrinsicOp::MOP_SampleCmpLevel, TranslateSample,
7127
     DXIL::OpCode::SampleCmpLevel},
7128
    {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample,
7129
     DXIL::OpCode::SampleCmpLevelZero},
7130
    {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
7131
    {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
7132
    {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
7133
    {IntrinsicOp::MOP_GatherAlpha, TranslateGather,
7134
     DXIL::OpCode::TextureGather},
7135
    {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
7136
    {IntrinsicOp::MOP_GatherCmp, TranslateGather,
7137
     DXIL::OpCode::TextureGatherCmp},
7138
    {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather,
7139
     DXIL::OpCode::TextureGatherCmp},
7140
    {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather,
7141
     DXIL::OpCode::TextureGatherCmp},
7142
    {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather,
7143
     DXIL::OpCode::TextureGatherCmp},
7144
    {IntrinsicOp::MOP_GatherCmpRed, TranslateGather,
7145
     DXIL::OpCode::TextureGatherCmp},
7146
    {IntrinsicOp::MOP_GatherGreen, TranslateGather,
7147
     DXIL::OpCode::TextureGather},
7148
    {IntrinsicOp::MOP_GatherRaw, TranslateGather,
7149
     DXIL::OpCode::TextureGatherRaw},
7150
    {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
7151
    {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition,
7152
     DXIL::OpCode::NumOpCodes},
7153
    {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7154
    {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7155
    {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7156
    {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation,
7157
     DXIL::OpCode::NumOpCodes},
7158
    {IntrinsicOp::MOP_InterlockedAdd64, TranslateMopAtomicBinaryOperation,
7159
     DXIL::OpCode::NumOpCodes},
7160
    {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation,
7161
     DXIL::OpCode::NumOpCodes},
7162
    {IntrinsicOp::MOP_InterlockedAnd64, TranslateMopAtomicBinaryOperation,
7163
     DXIL::OpCode::NumOpCodes},
7164
    {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg,
7165
     DXIL::OpCode::NumOpCodes},
7166
    {IntrinsicOp::MOP_InterlockedCompareExchange64, TranslateMopAtomicCmpXChg,
7167
     DXIL::OpCode::NumOpCodes},
7168
    {IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise,
7169
     TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
7170
    {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg,
7171
     DXIL::OpCode::NumOpCodes},
7172
    {IntrinsicOp::MOP_InterlockedCompareStore64, TranslateMopAtomicCmpXChg,
7173
     DXIL::OpCode::NumOpCodes},
7174
    {IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise,
7175
     TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
7176
    {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation,
7177
     DXIL::OpCode::NumOpCodes},
7178
    {IntrinsicOp::MOP_InterlockedExchange64, TranslateMopAtomicBinaryOperation,
7179
     DXIL::OpCode::NumOpCodes},
7180
    {IntrinsicOp::MOP_InterlockedExchangeFloat,
7181
     TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
7182
    {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation,
7183
     DXIL::OpCode::NumOpCodes},
7184
    {IntrinsicOp::MOP_InterlockedMax64, TranslateMopAtomicBinaryOperation,
7185
     DXIL::OpCode::NumOpCodes},
7186
    {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation,
7187
     DXIL::OpCode::NumOpCodes},
7188
    {IntrinsicOp::MOP_InterlockedMin64, TranslateMopAtomicBinaryOperation,
7189
     DXIL::OpCode::NumOpCodes},
7190
    {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation,
7191
     DXIL::OpCode::NumOpCodes},
7192
    {IntrinsicOp::MOP_InterlockedOr64, TranslateMopAtomicBinaryOperation,
7193
     DXIL::OpCode::NumOpCodes},
7194
    {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation,
7195
     DXIL::OpCode::NumOpCodes},
7196
    {IntrinsicOp::MOP_InterlockedXor64, TranslateMopAtomicBinaryOperation,
7197
     DXIL::OpCode::NumOpCodes},
7198
    {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7199
    {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7200
    {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7201
    {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7202
    {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter,
7203
     DXIL::OpCode::NumOpCodes},
7204
    {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter,
7205
     DXIL::OpCode::NumOpCodes},
7206
    {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
7207
    {IntrinsicOp::MOP_WriteSamplerFeedback, TranslateWriteSamplerFeedback,
7208
     DXIL::OpCode::WriteSamplerFeedback},
7209
    {IntrinsicOp::MOP_WriteSamplerFeedbackBias, TranslateWriteSamplerFeedback,
7210
     DXIL::OpCode::WriteSamplerFeedbackBias},
7211
    {IntrinsicOp::MOP_WriteSamplerFeedbackGrad, TranslateWriteSamplerFeedback,
7212
     DXIL::OpCode::WriteSamplerFeedbackGrad},
7213
    {IntrinsicOp::MOP_WriteSamplerFeedbackLevel, TranslateWriteSamplerFeedback,
7214
     DXIL::OpCode::WriteSamplerFeedbackLevel},
7215
7216
    {IntrinsicOp::MOP_Abort, TranslateGenericRayQueryMethod,
7217
     DXIL::OpCode::RayQuery_Abort},
7218
    {IntrinsicOp::MOP_CandidateGeometryIndex, TranslateGenericRayQueryMethod,
7219
     DXIL::OpCode::RayQuery_CandidateGeometryIndex},
7220
    {IntrinsicOp::MOP_CandidateInstanceContributionToHitGroupIndex,
7221
     TranslateGenericRayQueryMethod,
7222
     DXIL::OpCode::RayQuery_CandidateInstanceContributionToHitGroupIndex},
7223
    {IntrinsicOp::MOP_CandidateInstanceID, TranslateGenericRayQueryMethod,
7224
     DXIL::OpCode::RayQuery_CandidateInstanceID},
7225
    {IntrinsicOp::MOP_CandidateInstanceIndex, TranslateGenericRayQueryMethod,
7226
     DXIL::OpCode::RayQuery_CandidateInstanceIndex},
7227
    {IntrinsicOp::MOP_CandidateObjectRayDirection,
7228
     TranslateRayQueryFloat3Getter,
7229
     DXIL::OpCode::RayQuery_CandidateObjectRayDirection},
7230
    {IntrinsicOp::MOP_CandidateObjectRayOrigin, TranslateRayQueryFloat3Getter,
7231
     DXIL::OpCode::RayQuery_CandidateObjectRayOrigin},
7232
    {IntrinsicOp::MOP_CandidateObjectToWorld3x4,
7233
     TranslateRayQueryMatrix3x4Operation,
7234
     DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
7235
    {IntrinsicOp::MOP_CandidateObjectToWorld4x3,
7236
     TranslateRayQueryTransposedMatrix3x4Operation,
7237
     DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
7238
    {IntrinsicOp::MOP_CandidatePrimitiveIndex, TranslateGenericRayQueryMethod,
7239
     DXIL::OpCode::RayQuery_CandidatePrimitiveIndex},
7240
    {IntrinsicOp::MOP_CandidateProceduralPrimitiveNonOpaque,
7241
     TranslateGenericRayQueryMethod,
7242
     DXIL::OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque},
7243
    {IntrinsicOp::MOP_CandidateTriangleBarycentrics,
7244
     TranslateRayQueryFloat2Getter,
7245
     DXIL::OpCode::RayQuery_CandidateTriangleBarycentrics},
7246
    {IntrinsicOp::MOP_CandidateTriangleFrontFace,
7247
     TranslateGenericRayQueryMethod,
7248
     DXIL::OpCode::RayQuery_CandidateTriangleFrontFace},
7249
    {IntrinsicOp::MOP_CandidateTriangleRayT, TranslateGenericRayQueryMethod,
7250
     DXIL::OpCode::RayQuery_CandidateTriangleRayT},
7251
    {IntrinsicOp::MOP_CandidateType, TranslateGenericRayQueryMethod,
7252
     DXIL::OpCode::RayQuery_CandidateType},
7253
    {IntrinsicOp::MOP_CandidateWorldToObject3x4,
7254
     TranslateRayQueryMatrix3x4Operation,
7255
     DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
7256
    {IntrinsicOp::MOP_CandidateWorldToObject4x3,
7257
     TranslateRayQueryTransposedMatrix3x4Operation,
7258
     DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
7259
    {IntrinsicOp::MOP_CommitNonOpaqueTriangleHit,
7260
     TranslateGenericRayQueryMethod,
7261
     DXIL::OpCode::RayQuery_CommitNonOpaqueTriangleHit},
7262
    {IntrinsicOp::MOP_CommitProceduralPrimitiveHit,
7263
     TranslateCommitProceduralPrimitiveHit,
7264
     DXIL::OpCode::RayQuery_CommitProceduralPrimitiveHit},
7265
    {IntrinsicOp::MOP_CommittedGeometryIndex, TranslateGenericRayQueryMethod,
7266
     DXIL::OpCode::RayQuery_CommittedGeometryIndex},
7267
    {IntrinsicOp::MOP_CommittedInstanceContributionToHitGroupIndex,
7268
     TranslateGenericRayQueryMethod,
7269
     DXIL::OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex},
7270
    {IntrinsicOp::MOP_CommittedInstanceID, TranslateGenericRayQueryMethod,
7271
     DXIL::OpCode::RayQuery_CommittedInstanceID},
7272
    {IntrinsicOp::MOP_CommittedInstanceIndex, TranslateGenericRayQueryMethod,
7273
     DXIL::OpCode::RayQuery_CommittedInstanceIndex},
7274
    {IntrinsicOp::MOP_CommittedObjectRayDirection,
7275
     TranslateRayQueryFloat3Getter,
7276
     DXIL::OpCode::RayQuery_CommittedObjectRayDirection},
7277
    {IntrinsicOp::MOP_CommittedObjectRayOrigin, TranslateRayQueryFloat3Getter,
7278
     DXIL::OpCode::RayQuery_CommittedObjectRayOrigin},
7279
    {IntrinsicOp::MOP_CommittedObjectToWorld3x4,
7280
     TranslateRayQueryMatrix3x4Operation,
7281
     DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
7282
    {IntrinsicOp::MOP_CommittedObjectToWorld4x3,
7283
     TranslateRayQueryTransposedMatrix3x4Operation,
7284
     DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
7285
    {IntrinsicOp::MOP_CommittedPrimitiveIndex, TranslateGenericRayQueryMethod,
7286
     DXIL::OpCode::RayQuery_CommittedPrimitiveIndex},
7287
    {IntrinsicOp::MOP_CommittedRayT, TranslateGenericRayQueryMethod,
7288
     DXIL::OpCode::RayQuery_CommittedRayT},
7289
    {IntrinsicOp::MOP_CommittedStatus, TranslateGenericRayQueryMethod,
7290
     DXIL::OpCode::RayQuery_CommittedStatus},
7291
    {IntrinsicOp::MOP_CommittedTriangleBarycentrics,
7292
     TranslateRayQueryFloat2Getter,
7293
     DXIL::OpCode::RayQuery_CommittedTriangleBarycentrics},
7294
    {IntrinsicOp::MOP_CommittedTriangleFrontFace,
7295
     TranslateGenericRayQueryMethod,
7296
     DXIL::OpCode::RayQuery_CommittedTriangleFrontFace},
7297
    {IntrinsicOp::MOP_CommittedWorldToObject3x4,
7298
     TranslateRayQueryMatrix3x4Operation,
7299
     DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
7300
    {IntrinsicOp::MOP_CommittedWorldToObject4x3,
7301
     TranslateRayQueryTransposedMatrix3x4Operation,
7302
     DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
7303
    {IntrinsicOp::MOP_Proceed, TranslateGenericRayQueryMethod,
7304
     DXIL::OpCode::RayQuery_Proceed},
7305
    {IntrinsicOp::MOP_RayFlags, TranslateGenericRayQueryMethod,
7306
     DXIL::OpCode::RayQuery_RayFlags},
7307
    {IntrinsicOp::MOP_RayTMin, TranslateGenericRayQueryMethod,
7308
     DXIL::OpCode::RayQuery_RayTMin},
7309
    {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline,
7310
     DXIL::OpCode::RayQuery_TraceRayInline},
7311
    {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter,
7312
     DXIL::OpCode::RayQuery_WorldRayDirection},
7313
    {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter,
7314
     DXIL::OpCode::RayQuery_WorldRayOrigin},
7315
    {IntrinsicOp::MOP_Count, TranslateNodeGetInputRecordCount,
7316
     DXIL::OpCode::GetInputRecordCount},
7317
    {IntrinsicOp::MOP_FinishedCrossGroupSharing,
7318
     TranslateNodeFinishedCrossGroupSharing,
7319
     DXIL::OpCode::FinishedCrossGroupSharing},
7320
    {IntrinsicOp::MOP_GetGroupNodeOutputRecords,
7321
     TranslateGetGroupNodeOutputRecords,
7322
     DXIL::OpCode::AllocateNodeOutputRecords},
7323
    {IntrinsicOp::MOP_GetThreadNodeOutputRecords,
7324
     TranslateGetThreadNodeOutputRecords,
7325
     DXIL::OpCode::AllocateNodeOutputRecords},
7326
    {IntrinsicOp::MOP_IsValid, TranslateNodeOutputIsValid,
7327
     DXIL::OpCode::NodeOutputIsValid},
7328
    {IntrinsicOp::MOP_GroupIncrementOutputCount,
7329
     TranslateNodeGroupIncrementOutputCount,
7330
     DXIL::OpCode::IncrementOutputCount},
7331
    {IntrinsicOp::MOP_ThreadIncrementOutputCount,
7332
     TranslateNodeThreadIncrementOutputCount,
7333
     DXIL::OpCode::IncrementOutputCount},
7334
    {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete,
7335
     DXIL::OpCode::OutputComplete},
7336
7337
    // SPIRV change starts
7338
    {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic,
7339
     DXIL::OpCode::NumOpCodes},
7340
    // SPIRV change ends
7341
7342
    // Manually added part.
7343
    {IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation,
7344
     DXIL::OpCode::NumOpCodes},
7345
    {IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation,
7346
     DXIL::OpCode::NumOpCodes},
7347
    {IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A,
7348
     DXIL::OpCode::WaveActiveOp},
7349
    {IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A,
7350
     DXIL::OpCode::WaveActiveOp},
7351
    {IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A,
7352
     DXIL::OpCode::WaveActiveOp},
7353
    {IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A,
7354
     DXIL::OpCode::WaveActiveOp},
7355
    {IntrinsicOp::IOP_WaveMultiPrefixUProduct, TranslateWaveMultiPrefix,
7356
     DXIL::OpCode::WaveMultiPrefixOp},
7357
    {IntrinsicOp::IOP_WaveMultiPrefixUSum, TranslateWaveMultiPrefix,
7358
     DXIL::OpCode::WaveMultiPrefixOp},
7359
    {IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A,
7360
     DXIL::OpCode::WavePrefixOp},
7361
    {IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A,
7362
     DXIL::OpCode::WavePrefixOp},
7363
    {IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes},
7364
    {IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
7365
    {IntrinsicOp::IOP_udot, TranslateDot, DXIL::OpCode::NumOpCodes},
7366
    {IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi,
7367
     DXIL::OpCode::FirstbitHi},
7368
    {IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
7369
    {IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
7370
    {IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin},
7371
    {IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul},
7372
    {IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax},
7373
    {IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation,
7374
     DXIL::OpCode::NumOpCodes},
7375
    {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation,
7376
     DXIL::OpCode::NumOpCodes},
7377
    {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMakeNop,
7378
     DXIL::OpCode::HitObject_MakeNop},
7379
    {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread,
7380
     DXIL::OpCode::MaybeReorderThread},
7381
    {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic,
7382
     DXIL::OpCode::NumOpCodes},
7383
    {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic,
7384
     DXIL::OpCode::NumOpCodes},
7385
    {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic,
7386
     DXIL::OpCode::NumOpCodes},
7387
    {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery,
7388
     DXIL::OpCode::HitObject_FromRayQuery},
7389
    {IntrinsicOp::MOP_DxHitObject_GetAttributes,
7390
     TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes},
7391
    {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex,
7392
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex},
7393
    {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter,
7394
     DXIL::OpCode::HitObject_HitKind},
7395
    {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter,
7396
     DXIL::OpCode::HitObject_InstanceID},
7397
    {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex,
7398
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex},
7399
    {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection,
7400
     TranslateHitObjectVectorGetter,
7401
     DXIL::OpCode::HitObject_ObjectRayDirection},
7402
    {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin,
7403
     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin},
7404
    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4,
7405
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
7406
    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3,
7407
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
7408
    {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex,
7409
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex},
7410
    {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter,
7411
     DXIL::OpCode::HitObject_RayFlags},
7412
    {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent,
7413
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent},
7414
    {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter,
7415
     DXIL::OpCode::HitObject_RayTMin},
7416
    {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex,
7417
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex},
7418
    {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection,
7419
     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection},
7420
    {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin,
7421
     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin},
7422
    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4,
7423
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
7424
    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3,
7425
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
7426
    {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke,
7427
     DXIL::OpCode::HitObject_Invoke},
7428
    {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter,
7429
     DXIL::OpCode::HitObject_IsHit},
7430
    {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter,
7431
     DXIL::OpCode::HitObject_IsMiss},
7432
    {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter,
7433
     DXIL::OpCode::HitObject_IsNop},
7434
    {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant,
7435
     TranslateHitObjectLoadLocalRootTableConstant,
7436
     DXIL::OpCode::HitObject_LoadLocalRootTableConstant},
7437
    {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMakeMiss,
7438
     DXIL::OpCode::HitObject_MakeMiss},
7439
    {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex,
7440
     TranslateHitObjectSetShaderTableIndex,
7441
     DXIL::OpCode::HitObject_SetShaderTableIndex},
7442
    {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay,
7443
     DXIL::OpCode::HitObject_TraceRay},
7444
7445
    {IntrinsicOp::IOP___builtin_MatVecMul, TranslateMatVecMul,
7446
     DXIL::OpCode::MatVecMul},
7447
    {IntrinsicOp::IOP___builtin_MatVecMulAdd, TranslateMatVecMulAdd,
7448
     DXIL::OpCode::MatVecMulAdd},
7449
    {IntrinsicOp::IOP___builtin_OuterProductAccumulate,
7450
     TranslateOuterProductAccumulate, DXIL::OpCode::OuterProductAccumulate},
7451
    {IntrinsicOp::IOP___builtin_VectorAccumulate, TranslateVectorAccumulate,
7452
     DXIL::OpCode::VectorAccumulate},
7453
};
7454
} // namespace
7455
static_assert(
7456
    sizeof(gLowerTable) / sizeof(gLowerTable[0]) ==
7457
        static_cast<size_t>(IntrinsicOp::Num_Intrinsics),
7458
    "Intrinsic lowering table must be updated to account for new intrinsics.");
7459
7460
static void TranslateBuiltinIntrinsic(CallInst *CI,
7461
                                      HLOperationLowerHelper &helper,
7462
                                      HLObjectOperationLowerHelper *pObjHelper,
7463
65.2k
                                      bool &Translated) {
7464
65.2k
  unsigned opcode = hlsl::GetHLOpcode(CI);
7465
65.2k
  const IntrinsicLower &lower = gLowerTable[opcode];
7466
65.2k
  Value *Result = lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode,
7467
65.2k
                                  helper, pObjHelper, Translated);
7468
65.2k
  if (Result)
7469
40.3k
    CI->replaceAllUsesWith(Result);
7470
65.2k
}
7471
7472
// SharedMem.
7473
namespace {
7474
7475
496
bool IsSharedMemPtr(Value *Ptr) {
7476
496
  return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
7477
496
}
7478
7479
496
bool IsLocalVariablePtr(Value *Ptr) {
7480
1.10k
  while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
7481
608
    Ptr = GEP->getPointerOperand();
7482
608
  }
7483
496
  bool isAlloca = isa<AllocaInst>(Ptr);
7484
496
  if (isAlloca)
7485
0
    return true;
7486
7487
496
  GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
7488
496
  if (!GV)
7489
496
    return false;
7490
7491
0
  return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
7492
496
}
7493
7494
} // namespace
7495
7496
// Constant buffer.
7497
namespace {
7498
2.31k
unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
7499
2.31k
  DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
7500
2.31k
           "not an element type");
7501
  // TODO: Use real size after change constant buffer into linear layout.
7502
2.31k
  if (DL.getTypeSizeInBits(EltType) <= 32) {
7503
    // Constant buffer is 4 bytes align.
7504
2.26k
    return 4;
7505
2.26k
  }
7506
7507
48
  return 8;
7508
2.31k
}
7509
7510
Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
7511
0
                      IRBuilder<> &Builder) {
7512
0
  Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
7513
0
7514
0
  DXASSERT(!EltTy->isIntegerTy(1),
7515
0
           "Bools should not be loaded as their register representation.");
7516
0
7517
0
  // Align to 8 bytes for now.
7518
0
  Constant *align = hlslOP->GetU32Const(8);
7519
0
  Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
7520
0
  return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
7521
0
}
7522
7523
Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
7524
                              bool colMajor, OP *OP, const DataLayout &DL,
7525
0
                              IRBuilder<> &Builder) {
7526
0
  HLMatrixType MatTy = HLMatrixType::cast(matType);
7527
0
  Type *EltTy = MatTy.getElementTypeForMem();
7528
0
  unsigned matSize = MatTy.getNumElements();
7529
0
  std::vector<Value *> elts(matSize);
7530
0
  Value *EltByteSize = ConstantInt::get(
7531
0
      offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
7532
0
7533
0
  // TODO: use real size after change constant buffer into linear layout.
7534
0
  Value *baseOffset = offset;
7535
0
  for (unsigned i = 0; i < matSize; i++) {
7536
0
    elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
7537
0
    baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
7538
0
  }
7539
0
7540
0
  Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
7541
0
  Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
7542
0
  return Vec;
7543
0
}
7544
7545
void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
7546
                    hlsl::OP *hlslOP, IRBuilder<> &Builder,
7547
                    DxilFieldAnnotation *prevFieldAnnotation,
7548
                    const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
7549
                    HLObjectOperationLowerHelper *pObjHelper);
7550
7551
Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
7552
104
                             IRBuilder<> &Builder, bool bInsertLdNextToGEP) {
7553
104
  DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
7554
104
  Value *baseIdx = (GEP->idx_begin())->get();
7555
104
  Value *zeroIdx = Builder.getInt32(0);
7556
104
  DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
7557
104
                    "base index must be 0");
7558
104
  Value *idx = (GEP->idx_begin() + 1)->get();
7559
104
  if (dyn_cast<ConstantInt>(idx)) {
7560
56
    return Builder.CreateExtractElement(ldData, idx);
7561
56
  }
7562
7563
  // Dynamic indexing.
7564
  // Copy vec to array.
7565
48
  Type *Ty = ldData->getType();
7566
48
  Type *EltTy = Ty->getVectorElementType();
7567
48
  unsigned vecSize = Ty->getVectorNumElements();
7568
48
  ArrayType *AT = ArrayType::get(EltTy, vecSize);
7569
48
  IRBuilder<> AllocaBuilder(
7570
48
      GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
7571
48
  Value *tempArray = AllocaBuilder.CreateAlloca(AT);
7572
48
  Value *zero = Builder.getInt32(0);
7573
240
  for (unsigned int i = 0; i < vecSize; 
i++192
) {
7574
192
    Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
7575
192
    Value *Ptr =
7576
192
        Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
7577
192
    Builder.CreateStore(Elt, Ptr);
7578
192
  }
7579
  // Load from temp array.
7580
48
  if (bInsertLdNextToGEP) {
7581
    // Insert the new GEP just before the old and to-be-deleted GEP
7582
32
    Builder.SetInsertPoint(GEP);
7583
32
  }
7584
48
  Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
7585
48
  return Builder.CreateLoad(EltGEP);
7586
104
}
7587
7588
void TranslateResourceInCB(LoadInst *LI,
7589
                           HLObjectOperationLowerHelper *pObjHelper,
7590
314
                           GlobalVariable *CbGV) {
7591
314
  if (LI->user_empty()) {
7592
0
    LI->eraseFromParent();
7593
0
    return;
7594
0
  }
7595
7596
314
  GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
7597
314
  CallInst *CI = cast<CallInst>(LI->user_back());
7598
314
  CallInst *Anno = cast<CallInst>(CI->user_back());
7599
314
  DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno);
7600
314
  Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP);
7601
7602
  // Lower Ptr to GV base Ptr.
7603
314
  Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
7604
314
  IRBuilder<> Builder(LI);
7605
314
  Value *GvLd = Builder.CreateLoad(GvPtr);
7606
314
  LI->replaceAllUsesWith(GvLd);
7607
314
  LI->eraseFromParent();
7608
314
}
7609
7610
void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
7611
                            hlsl::OP *hlslOP,
7612
                            DxilFieldAnnotation *prevFieldAnnotation,
7613
                            DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
7614
0
                            HLObjectOperationLowerHelper *pObjHelper) {
7615
0
  IRBuilder<> Builder(user);
7616
0
  if (CallInst *CI = dyn_cast<CallInst>(user)) {
7617
0
    HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
7618
0
    unsigned opcode = GetHLOpcode(CI);
7619
0
    if (group == HLOpcodeGroup::HLMatLoadStore) {
7620
0
      HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
7621
0
      bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
7622
0
      DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
7623
0
                   matOp == HLMatLoadStoreOpcode::RowMatLoad,
7624
0
               "No store on cbuffer");
7625
0
      Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
7626
0
                          ->getType()
7627
0
                          ->getPointerElementType();
7628
0
      Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
7629
0
                                            colMajor, hlslOP, DL, Builder);
7630
0
      CI->replaceAllUsesWith(newLd);
7631
0
      CI->eraseFromParent();
7632
0
    } else if (group == HLOpcodeGroup::HLSubscript) {
7633
0
      HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
7634
0
      Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
7635
0
      HLMatrixType MatTy =
7636
0
          HLMatrixType::cast(basePtr->getType()->getPointerElementType());
7637
0
      Type *EltTy = MatTy.getElementTypeForReg();
7638
0
7639
0
      Value *EltByteSize = ConstantInt::get(
7640
0
          baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
7641
0
7642
0
      Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
7643
0
7644
0
      Type *resultType = CI->getType()->getPointerElementType();
7645
0
      unsigned resultSize = 1;
7646
0
      if (resultType->isVectorTy())
7647
0
        resultSize = resultType->getVectorNumElements();
7648
0
      DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
7649
0
      assert(resultSize <= 16);
7650
0
      Value *idxList[16];
7651
0
7652
0
      switch (subOp) {
7653
0
      case HLSubscriptOpcode::ColMatSubscript:
7654
0
      case HLSubscriptOpcode::RowMatSubscript: {
7655
0
        for (unsigned i = 0; i < resultSize; i++) {
7656
0
          Value *idx =
7657
0
              CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
7658
0
          Value *offset = Builder.CreateMul(idx, EltByteSize);
7659
0
          idxList[i] = Builder.CreateAdd(baseOffset, offset);
7660
0
        }
7661
0
7662
0
      } break;
7663
0
      case HLSubscriptOpcode::RowMatElement:
7664
0
      case HLSubscriptOpcode::ColMatElement: {
7665
0
        Constant *EltIdxs = cast<Constant>(idx);
7666
0
        for (unsigned i = 0; i < resultSize; i++) {
7667
0
          Value *offset =
7668
0
              Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
7669
0
          idxList[i] = Builder.CreateAdd(baseOffset, offset);
7670
0
        }
7671
0
      } break;
7672
0
      default:
7673
0
        DXASSERT(0, "invalid operation on const buffer");
7674
0
        break;
7675
0
      }
7676
0
7677
0
      Value *ldData = UndefValue::get(resultType);
7678
0
      if (resultType->isVectorTy()) {
7679
0
        for (unsigned i = 0; i < resultSize; i++) {
7680
0
          Value *eltData =
7681
0
              GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
7682
0
          ldData = Builder.CreateInsertElement(ldData, eltData, i);
7683
0
        }
7684
0
      } else {
7685
0
        ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
7686
0
      }
7687
0
7688
0
      for (auto U = CI->user_begin(); U != CI->user_end();) {
7689
0
        Value *subsUser = *(U++);
7690
0
        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
7691
0
          Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
7692
0
                                                 /*bInsertLdNextToGEP*/ true);
7693
0
7694
0
          for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
7695
0
            Value *gepUser = *(gepU++);
7696
0
            // Must be load here;
7697
0
            LoadInst *ldUser = cast<LoadInst>(gepUser);
7698
0
            ldUser->replaceAllUsesWith(subData);
7699
0
            ldUser->eraseFromParent();
7700
0
          }
7701
0
          GEP->eraseFromParent();
7702
0
        } else {
7703
0
          // Must be load here.
7704
0
          LoadInst *ldUser = cast<LoadInst>(subsUser);
7705
0
          ldUser->replaceAllUsesWith(ldData);
7706
0
          ldUser->eraseFromParent();
7707
0
        }
7708
0
      }
7709
0
7710
0
      CI->eraseFromParent();
7711
0
    } else {
7712
0
      DXASSERT(0, "not implemented yet");
7713
0
    }
7714
0
  } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
7715
0
    Type *Ty = ldInst->getType();
7716
0
    Type *EltTy = Ty->getScalarType();
7717
0
    // Resource inside cbuffer is lowered after GenerateDxilOperations.
7718
0
    if (dxilutil::IsHLSLObjectType(Ty)) {
7719
0
      CallInst *CI = cast<CallInst>(handle);
7720
0
      // CI should be annotate handle.
7721
0
      // Need createHandle here.
7722
0
      if (GetHLOpcodeGroup(CI->getCalledFunction()) ==
7723
0
          HLOpcodeGroup::HLAnnotateHandle)
7724
0
        CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx));
7725
0
      GlobalVariable *CbGV = cast<GlobalVariable>(
7726
0
          CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
7727
0
      TranslateResourceInCB(ldInst, pObjHelper, CbGV);
7728
0
      return;
7729
0
    }
7730
0
    DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
7731
0
7732
0
    unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
7733
0
7734
0
    Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
7735
0
    if (Ty->isVectorTy()) {
7736
0
      Value *result = UndefValue::get(Ty);
7737
0
      result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
7738
0
      // Update offset by 4 bytes.
7739
0
      Value *offset =
7740
0
          Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
7741
0
      for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
7742
0
        Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
7743
0
        result = Builder.CreateInsertElement(result, elt, i);
7744
0
        // Update offset by 4 bytes.
7745
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
7746
0
      }
7747
0
      newLd = result;
7748
0
    }
7749
0
7750
0
    ldInst->replaceAllUsesWith(newLd);
7751
0
    ldInst->eraseFromParent();
7752
0
  } else {
7753
0
    // Must be GEP here
7754
0
    GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
7755
0
    TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
7756
0
                   prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
7757
0
    GEP->eraseFromParent();
7758
0
  }
7759
0
}
7760
7761
void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
7762
                    hlsl::OP *hlslOP, IRBuilder<> &Builder,
7763
                    DxilFieldAnnotation *prevFieldAnnotation,
7764
                    const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
7765
0
                    HLObjectOperationLowerHelper *pObjHelper) {
7766
0
  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
7767
0
7768
0
  Value *offset = baseOffset;
7769
0
  // update offset
7770
0
  DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
7771
0
7772
0
  gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
7773
0
7774
0
  for (; GEPIt != E; GEPIt++) {
7775
0
    Value *idx = GEPIt.getOperand();
7776
0
    unsigned immIdx = 0;
7777
0
    bool bImmIdx = false;
7778
0
    if (Constant *constIdx = dyn_cast<Constant>(idx)) {
7779
0
      immIdx = constIdx->getUniqueInteger().getLimitedValue();
7780
0
      bImmIdx = true;
7781
0
    }
7782
0
7783
0
    if (GEPIt->isPointerTy()) {
7784
0
      Type *EltTy = GEPIt->getPointerElementType();
7785
0
      unsigned size = 0;
7786
0
      if (StructType *ST = dyn_cast<StructType>(EltTy)) {
7787
0
        DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
7788
0
        size = annotation->GetCBufferSize();
7789
0
      } else {
7790
0
        DXASSERT(fieldAnnotation, "must be a field");
7791
0
        if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
7792
0
          unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
7793
0
              *fieldAnnotation, EltTy, dxilTypeSys);
7794
0
7795
0
          // Decide the nested array size.
7796
0
          unsigned nestedArraySize = 1;
7797
0
7798
0
          Type *EltTy = AT->getArrayElementType();
7799
0
          // support multi level of array
7800
0
          while (EltTy->isArrayTy()) {
7801
0
            ArrayType *EltAT = cast<ArrayType>(EltTy);
7802
0
            nestedArraySize *= EltAT->getNumElements();
7803
0
            EltTy = EltAT->getElementType();
7804
0
          }
7805
0
          // Align to 4 * 4 bytes.
7806
0
          unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
7807
0
          size = nestedArraySize * alignedSize;
7808
0
        } else {
7809
0
          size = DL.getTypeAllocSize(EltTy);
7810
0
        }
7811
0
      }
7812
0
      // Align to 4 * 4 bytes.
7813
0
      size = (size + 15) & 0xfffffff0;
7814
0
      if (bImmIdx) {
7815
0
        unsigned tempOffset = size * immIdx;
7816
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
7817
0
      } else {
7818
0
        Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
7819
0
        offset = Builder.CreateAdd(offset, tempOffset);
7820
0
      }
7821
0
    } else if (GEPIt->isStructTy()) {
7822
0
      StructType *ST = cast<StructType>(*GEPIt);
7823
0
      DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
7824
0
      fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
7825
0
      unsigned structOffset = fieldAnnotation->GetCBufferOffset();
7826
0
      offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
7827
0
    } else if (GEPIt->isArrayTy()) {
7828
0
      DXASSERT(fieldAnnotation != nullptr, "must a field");
7829
0
      unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
7830
0
          *fieldAnnotation, *GEPIt, dxilTypeSys);
7831
0
      // Decide the nested array size.
7832
0
      unsigned nestedArraySize = 1;
7833
0
7834
0
      Type *EltTy = GEPIt->getArrayElementType();
7835
0
      // support multi level of array
7836
0
      while (EltTy->isArrayTy()) {
7837
0
        ArrayType *EltAT = cast<ArrayType>(EltTy);
7838
0
        nestedArraySize *= EltAT->getNumElements();
7839
0
        EltTy = EltAT->getElementType();
7840
0
      }
7841
0
      // Align to 4 * 4 bytes.
7842
0
      unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
7843
0
      unsigned size = nestedArraySize * alignedSize;
7844
0
      if (bImmIdx) {
7845
0
        unsigned tempOffset = size * immIdx;
7846
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
7847
0
      } else {
7848
0
        Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
7849
0
        offset = Builder.CreateAdd(offset, tempOffset);
7850
0
      }
7851
0
    } else if (GEPIt->isVectorTy()) {
7852
0
      unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
7853
0
      if (bImmIdx) {
7854
0
        unsigned tempOffset = size * immIdx;
7855
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
7856
0
      } else {
7857
0
        Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
7858
0
        offset = Builder.CreateAdd(offset, tempOffset);
7859
0
      }
7860
0
    } else {
7861
0
      gep_type_iterator temp = GEPIt;
7862
0
      temp++;
7863
0
      DXASSERT(temp == E, "scalar type must be the last");
7864
0
    }
7865
0
  }
7866
0
7867
0
  for (auto U = GEP->user_begin(); U != GEP->user_end();) {
7868
0
    Instruction *user = cast<Instruction>(*(U++));
7869
0
7870
0
    TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
7871
0
                           dxilTypeSys, DL, pObjHelper);
7872
0
  }
7873
0
}
7874
7875
Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
7876
                            unsigned channelOffset, Type *EltTy, OP *hlslOP,
7877
15.0k
                            IRBuilder<> &Builder) {
7878
15.0k
  Constant *OpArg =
7879
15.0k
      hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
7880
7881
15.0k
  DXASSERT(!EltTy->isIntegerTy(1),
7882
15.0k
           "Bools should not be loaded as their register representation.");
7883
7884
15.0k
  Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
7885
15.0k
  Type *halfTy = Type::getHalfTy(EltTy->getContext());
7886
15.0k
  Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
7887
15.0k
  Type *i16Ty = Type::getInt16Ty(EltTy->getContext());
7888
7889
15.0k
  bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
7890
15.0k
  bool is16 = (EltTy == halfTy || 
EltTy == i16Ty14.5k
) &&
!hlslOP->UseMinPrecision()762
;
7891
15.0k
  DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4,
7892
15.0k
                    "legacy cbuffer don't across 16 bytes register.");
7893
15.0k
  if (is64) {
7894
428
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
7895
428
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
7896
428
    DXASSERT((channelOffset & 1) == 0,
7897
428
             "channel offset must be even for double");
7898
428
    unsigned eltIdx = channelOffset >> 1;
7899
428
    Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
7900
428
    return Result;
7901
428
  }
7902
7903
14.6k
  Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
7904
14.6k
  Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
7905
14.6k
  return Builder.CreateExtractValue(loadLegacy, channelOffset);
7906
15.0k
}
7907
7908
Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
7909
                            unsigned channelOffset, Type *EltTy,
7910
                            unsigned vecSize, OP *hlslOP,
7911
14.7k
                            IRBuilder<> &Builder) {
7912
14.7k
  Constant *OpArg =
7913
14.7k
      hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
7914
7915
14.7k
  DXASSERT(!EltTy->isIntegerTy(1),
7916
14.7k
           "Bools should not be loaded as their register representation.");
7917
7918
14.7k
  Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
7919
14.7k
  Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
7920
14.7k
  Type *halfTy = Type::getHalfTy(EltTy->getContext());
7921
14.7k
  Type *shortTy = Type::getInt16Ty(EltTy->getContext());
7922
7923
14.7k
  bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
7924
14.7k
  bool is16 =
7925
14.7k
      (EltTy == shortTy || 
EltTy == halfTy14.5k
) &&
!hlslOP->UseMinPrecision()898
;
7926
14.7k
  DXASSERT((is16 && channelOffset + vecSize <= 8) ||
7927
14.7k
               (channelOffset + vecSize) <= 4,
7928
14.7k
           "legacy cbuffer don't across 16 bytes register.");
7929
14.7k
  if (is16) {
7930
536
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
7931
536
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
7932
536
    Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
7933
2.06k
    for (unsigned i = 0; i < vecSize; 
++i1.53k
) {
7934
1.53k
      Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
7935
1.53k
      Result = Builder.CreateInsertElement(Result, NewElt, i);
7936
1.53k
    }
7937
536
    return Result;
7938
536
  }
7939
7940
14.2k
  if (is64) {
7941
76
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
7942
76
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
7943
76
    Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
7944
76
    unsigned smallVecSize = 2;
7945
76
    if (vecSize < smallVecSize)
7946
0
      smallVecSize = vecSize;
7947
228
    for (unsigned i = 0; i < smallVecSize; 
++i152
) {
7948
152
      Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
7949
152
      Result = Builder.CreateInsertElement(Result, NewElt, i);
7950
152
    }
7951
76
    if (vecSize > 2) {
7952
      // Got to next cb register.
7953
68
      legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
7954
68
      Value *loadLegacy =
7955
68
          Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
7956
204
      for (unsigned i = 2; i < vecSize; 
++i136
) {
7957
136
        Value *NewElt = Builder.CreateExtractValue(loadLegacy, i - 2);
7958
136
        Result = Builder.CreateInsertElement(Result, NewElt, i);
7959
136
      }
7960
68
    }
7961
76
    return Result;
7962
76
  }
7963
7964
14.1k
  Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
7965
14.1k
  Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
7966
14.1k
  Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
7967
62.4k
  for (unsigned i = 0; i < vecSize; 
++i48.3k
) {
7968
48.3k
    Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
7969
48.3k
    Result = Builder.CreateInsertElement(Result, NewElt, i);
7970
48.3k
  }
7971
14.1k
  return Result;
7972
14.2k
}
7973
7974
Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle,
7975
                                    Value *legacyIdx, bool colMajor, OP *OP,
7976
                                    bool memElemRepr, const DataLayout &DL,
7977
2.17k
                                    IRBuilder<> &Builder) {
7978
2.17k
  Type *EltTy = MatTy.getElementTypeForMem();
7979
7980
2.17k
  unsigned matSize = MatTy.getNumElements();
7981
2.17k
  std::vector<Value *> elts(matSize);
7982
2.17k
  unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
7983
2.17k
  if (colMajor) {
7984
1.72k
    unsigned colByteSize = 4 * EltByteSize;
7985
1.72k
    unsigned colRegSize = (colByteSize + 15) >> 4;
7986
7.72k
    for (unsigned c = 0; c < MatTy.getNumColumns(); 
c++6.00k
) {
7987
6.00k
      Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
7988
6.00k
                                        EltTy, MatTy.getNumRows(), OP, Builder);
7989
7990
27.6k
      for (unsigned r = 0; r < MatTy.getNumRows(); 
r++21.6k
) {
7991
21.6k
        unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
7992
21.6k
        elts[matIdx] = Builder.CreateExtractElement(col, r);
7993
21.6k
      }
7994
      // Update offset for a column.
7995
6.00k
      legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
7996
6.00k
    }
7997
1.72k
  } else {
7998
448
    unsigned rowByteSize = 4 * EltByteSize;
7999
448
    unsigned rowRegSize = (rowByteSize + 15) >> 4;
8000
1.73k
    for (unsigned r = 0; r < MatTy.getNumRows(); 
r++1.28k
) {
8001
1.28k
      Value *row =
8002
1.28k
          GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0, EltTy,
8003
1.28k
                               MatTy.getNumColumns(), OP, Builder);
8004
5.32k
      for (unsigned c = 0; c < MatTy.getNumColumns(); 
c++4.03k
) {
8005
4.03k
        unsigned matIdx = MatTy.getRowMajorIndex(r, c);
8006
4.03k
        elts[matIdx] = Builder.CreateExtractElement(row, c);
8007
4.03k
      }
8008
      // Update offset for a row.
8009
1.28k
      legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
8010
1.28k
    }
8011
448
  }
8012
8013
2.17k
  Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
8014
2.17k
  if (!memElemRepr)
8015
1.86k
    Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
8016
2.17k
  return Vec;
8017
2.17k
}
8018
8019
void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
8020
                          Value *legacyIdx, unsigned channelOffset,
8021
                          hlsl::OP *hlslOP, IRBuilder<> &Builder,
8022
                          DxilFieldAnnotation *prevFieldAnnotation,
8023
                          const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
8024
                          HLObjectOperationLowerHelper *pObjHelper);
8025
8026
void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
8027
                                  Value *legacyIdx, unsigned channelOffset,
8028
                                  hlsl::OP *hlslOP,
8029
                                  DxilFieldAnnotation *prevFieldAnnotation,
8030
                                  DxilTypeSystem &dxilTypeSys,
8031
                                  const DataLayout &DL,
8032
43.0k
                                  HLObjectOperationLowerHelper *pObjHelper) {
8033
43.0k
  IRBuilder<> Builder(user);
8034
43.0k
  if (CallInst *CI = dyn_cast<CallInst>(user)) {
8035
2.23k
    HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
8036
2.23k
    if (group == HLOpcodeGroup::HLMatLoadStore) {
8037
1.86k
      unsigned opcode = GetHLOpcode(CI);
8038
1.86k
      HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
8039
1.86k
      bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
8040
1.86k
      DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
8041
1.86k
                   matOp == HLMatLoadStoreOpcode::RowMatLoad,
8042
1.86k
               "No store on cbuffer");
8043
1.86k
      HLMatrixType MatTy =
8044
1.86k
          HLMatrixType::cast(CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
8045
1.86k
                                 ->getType()
8046
1.86k
                                 ->getPointerElementType());
8047
      // This will replace a call, so we should use the register representation
8048
      // of elements
8049
1.86k
      Value *newLd = TranslateConstBufMatLdLegacy(
8050
1.86k
          MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ false, DL,
8051
1.86k
          Builder);
8052
1.86k
      CI->replaceAllUsesWith(newLd);
8053
1.86k
      dxilutil::TryScatterDebugValueToVectorElements(newLd);
8054
1.86k
      CI->eraseFromParent();
8055
1.86k
    } else 
if (370
group == HLOpcodeGroup::HLSubscript370
) {
8056
350
      unsigned opcode = GetHLOpcode(CI);
8057
350
      HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
8058
350
      Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
8059
350
      HLMatrixType MatTy =
8060
350
          HLMatrixType::cast(basePtr->getType()->getPointerElementType());
8061
350
      Type *EltTy = MatTy.getElementTypeForReg();
8062
8063
350
      Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
8064
8065
350
      Type *resultType = CI->getType()->getPointerElementType();
8066
350
      unsigned resultSize = 1;
8067
350
      if (resultType->isVectorTy())
8068
254
        resultSize = resultType->getVectorNumElements();
8069
350
      DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
8070
350
      assert(resultSize <= 16);
8071
350
      Value *idxList[16];
8072
350
      bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
8073
350
                      
subOp == HLSubscriptOpcode::ColMatElement178
;
8074
350
      bool dynamicIndexing = !isa<ConstantInt>(idx) &&
8075
350
                             
!isa<ConstantAggregateZero>(idx)160
&&
8076
350
                             
!isa<ConstantDataSequential>(idx)136
;
8077
8078
350
      Value *ldData = UndefValue::get(resultType);
8079
350
      if (!dynamicIndexing) {
8080
        // This will replace a load or GEP, so we should use the memory
8081
        // representation of elements
8082
302
        Value *matLd = TranslateConstBufMatLdLegacy(
8083
302
            MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ true,
8084
302
            DL, Builder);
8085
        // The matLd is keep original layout, just use the idx calc in
8086
        // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
8087
302
        switch (subOp) {
8088
50
        case HLSubscriptOpcode::RowMatSubscript:
8089
190
        case HLSubscriptOpcode::ColMatSubscript: {
8090
830
          for (unsigned i = 0; i < resultSize; 
i++640
) {
8091
640
            idxList[i] =
8092
640
                CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
8093
640
          }
8094
190
        } break;
8095
32
        case HLSubscriptOpcode::RowMatElement:
8096
112
        case HLSubscriptOpcode::ColMatElement: {
8097
112
          Constant *EltIdxs = cast<Constant>(idx);
8098
264
          for (unsigned i = 0; i < resultSize; 
i++152
) {
8099
152
            idxList[i] = EltIdxs->getAggregateElement(i);
8100
152
          }
8101
112
        } break;
8102
0
        default:
8103
0
          DXASSERT(0, "invalid operation on const buffer");
8104
0
          break;
8105
302
        }
8106
8107
302
        if (resultType->isVectorTy()) {
8108
902
          for (unsigned i = 0; i < resultSize; 
i++696
) {
8109
696
            Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
8110
696
            ldData = Builder.CreateInsertElement(ldData, eltData, i);
8111
696
          }
8112
206
        } else {
8113
96
          Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
8114
96
          ldData = eltData;
8115
96
        }
8116
302
      } else {
8117
        // Must be matSub here.
8118
48
        Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
8119
8120
48
        if (colMajor) {
8121
          // idx is c * row + r.
8122
          // For first col, c is 0, so idx is r.
8123
32
          Value *one = Builder.getInt32(1);
8124
          // row.x = c[0].[idx]
8125
          // row.y = c[1].[idx]
8126
          // row.z = c[2].[idx]
8127
          // row.w = c[3].[idx]
8128
32
          Value *Elts[4];
8129
32
          ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns());
8130
8131
32
          IRBuilder<> AllocaBuilder(user->getParent()
8132
32
                                        ->getParent()
8133
32
                                        ->getEntryBlock()
8134
32
                                        .getFirstInsertionPt());
8135
8136
32
          Value *tempArray = AllocaBuilder.CreateAlloca(AT);
8137
32
          Value *zero = AllocaBuilder.getInt32(0);
8138
32
          Value *cbufIdx = legacyIdx;
8139
152
          for (unsigned int c = 0; c < MatTy.getNumColumns(); 
c++120
) {
8140
120
            Value *ColVal = GenerateCBLoadLegacy(
8141
120
                handle, cbufIdx, /*channelOffset*/ 0, EltTy, MatTy.getNumRows(),
8142
120
                hlslOP, Builder);
8143
            // Convert ColVal to array for indexing.
8144
576
            for (unsigned int r = 0; r < MatTy.getNumRows(); 
r++456
) {
8145
456
              Value *Elt =
8146
456
                  Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
8147
456
              Value *Ptr = Builder.CreateInBoundsGEP(
8148
456
                  tempArray, {zero, Builder.getInt32(r)});
8149
456
              Builder.CreateStore(Elt, Ptr);
8150
456
            }
8151
8152
120
            Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
8153
120
            Elts[c] = Builder.CreateLoad(Ptr);
8154
            // Update cbufIdx.
8155
120
            cbufIdx = Builder.CreateAdd(cbufIdx, one);
8156
120
          }
8157
32
          if (resultType->isVectorTy()) {
8158
152
            for (unsigned int c = 0; c < MatTy.getNumColumns(); 
c++120
) {
8159
120
              ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
8160
120
            }
8161
32
          } else {
8162
0
            ldData = Elts[0];
8163
0
          }
8164
32
        } else {
8165
          // idx is r * col + c;
8166
          // r = idx / col;
8167
16
          Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns());
8168
16
          idx = Builder.CreateUDiv(idx, cCol);
8169
16
          idx = Builder.CreateAdd(idx, legacyIdx);
8170
          // Just return a row; 'col' is the number of columns in the row.
8171
16
          ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
8172
16
                                        MatTy.getNumColumns(), hlslOP, Builder);
8173
16
        }
8174
48
        if (!resultType->isVectorTy()) {
8175
0
          ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
8176
0
        }
8177
48
      }
8178
8179
700
      
for (auto U = CI->user_begin(); 350
U != CI->user_end();) {
8180
350
        Value *subsUser = *(U++);
8181
350
        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
8182
80
          Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
8183
80
                                                 /*bInsertLdNextToGEP*/ true);
8184
160
          for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
8185
80
            Value *gepUser = *(gepU++);
8186
            // Must be load here;
8187
80
            LoadInst *ldUser = cast<LoadInst>(gepUser);
8188
80
            ldUser->replaceAllUsesWith(subData);
8189
80
            ldUser->eraseFromParent();
8190
80
          }
8191
80
          GEP->eraseFromParent();
8192
270
        } else {
8193
          // Must be load here.
8194
270
          LoadInst *ldUser = cast<LoadInst>(subsUser);
8195
270
          ldUser->replaceAllUsesWith(ldData);
8196
270
          ldUser->eraseFromParent();
8197
270
        }
8198
350
      }
8199
8200
350
      CI->eraseFromParent();
8201
350
    } else 
if (IntrinsicInst *20
II20
= dyn_cast<IntrinsicInst>(user)) {
8202
20
      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
8203
20
          
II->getIntrinsicID() == Intrinsic::lifetime_end10
) {
8204
20
        DXASSERT(II->use_empty(), "lifetime intrinsic can't have uses");
8205
20
        II->eraseFromParent();
8206
20
      } else {
8207
0
        DXASSERT(0, "not implemented yet");
8208
0
      }
8209
20
    } else {
8210
0
      DXASSERT(0, "not implemented yet");
8211
0
    }
8212
40.8k
  } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
8213
22.7k
    Type *Ty = ldInst->getType();
8214
22.7k
    Type *EltTy = Ty->getScalarType();
8215
    // Resource inside cbuffer is lowered after GenerateDxilOperations.
8216
22.7k
    if (dxilutil::IsHLSLObjectType(Ty)) {
8217
314
      CallInst *CI = cast<CallInst>(handle);
8218
      // CI should be annotate handle.
8219
      // Need createHandle here.
8220
314
      if (GetHLOpcodeGroup(CI->getCalledFunction()) ==
8221
314
          HLOpcodeGroup::HLAnnotateHandle)
8222
314
        CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx));
8223
8224
314
      GlobalVariable *CbGV = cast<GlobalVariable>(
8225
314
          CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
8226
314
      TranslateResourceInCB(ldInst, pObjHelper, CbGV);
8227
314
      return;
8228
314
    }
8229
22.4k
    DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
8230
8231
22.4k
    Value *newLd = nullptr;
8232
8233
22.4k
    if (Ty->isVectorTy())
8234
7.31k
      newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
8235
7.31k
                                   Ty->getVectorNumElements(), hlslOP, Builder);
8236
15.0k
    else
8237
15.0k
      newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
8238
15.0k
                                   hlslOP, Builder);
8239
8240
22.4k
    ldInst->replaceAllUsesWith(newLd);
8241
22.4k
    dxilutil::TryScatterDebugValueToVectorElements(newLd);
8242
22.4k
    ldInst->eraseFromParent();
8243
22.4k
  } else 
if (BitCastInst *18.1k
BCI18.1k
= dyn_cast<BitCastInst>(user)) {
8244
64
    for (auto it = BCI->user_begin(); it != BCI->user_end();) {
8245
36
      Instruction *I = cast<Instruction>(*it++);
8246
36
      TranslateCBAddressUserLegacy(I, handle, legacyIdx, channelOffset, hlslOP,
8247
36
                                   prevFieldAnnotation, dxilTypeSys, DL,
8248
36
                                   pObjHelper);
8249
36
    }
8250
28
    BCI->eraseFromParent();
8251
18.0k
  } else {
8252
    // Must be GEP here
8253
18.0k
    GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
8254
18.0k
    TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
8255
18.0k
                         prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
8256
18.0k
    GEP->eraseFromParent();
8257
18.0k
  }
8258
43.0k
}
8259
8260
void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
8261
                          Value *legacyIndex, unsigned channel,
8262
                          hlsl::OP *hlslOP, IRBuilder<> &Builder,
8263
                          DxilFieldAnnotation *prevFieldAnnotation,
8264
                          const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
8265
18.0k
                          HLObjectOperationLowerHelper *pObjHelper) {
8266
18.0k
  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
8267
8268
  // update offset
8269
18.0k
  DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
8270
8271
18.0k
  gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
8272
8273
62.2k
  for (; GEPIt != E; 
GEPIt++44.1k
) {
8274
44.2k
    Value *idx = GEPIt.getOperand();
8275
44.2k
    unsigned immIdx = 0;
8276
44.2k
    bool bImmIdx = false;
8277
44.2k
    if (Constant *constIdx = dyn_cast<Constant>(idx)) {
8278
41.4k
      immIdx = constIdx->getUniqueInteger().getLimitedValue();
8279
41.4k
      bImmIdx = true;
8280
41.4k
    }
8281
8282
44.2k
    if (GEPIt->isPointerTy()) {
8283
18.0k
      Type *EltTy = GEPIt->getPointerElementType();
8284
18.0k
      unsigned size = 0;
8285
18.0k
      if (StructType *ST = dyn_cast<StructType>(EltTy)) {
8286
18.0k
        DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
8287
18.0k
        size = annotation->GetCBufferSize();
8288
18.0k
      } else {
8289
32
        DXASSERT(fieldAnnotation, "must be a field");
8290
32
        if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
8291
32
          unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
8292
32
              *fieldAnnotation, EltTy, dxilTypeSys);
8293
8294
          // Decide the nested array size.
8295
32
          unsigned nestedArraySize = 1;
8296
8297
32
          Type *EltTy = AT->getArrayElementType();
8298
          // support multi level of array
8299
40
          while (EltTy->isArrayTy()) {
8300
8
            ArrayType *EltAT = cast<ArrayType>(EltTy);
8301
8
            nestedArraySize *= EltAT->getNumElements();
8302
8
            EltTy = EltAT->getElementType();
8303
8
          }
8304
          // Align to 4 * 4 bytes.
8305
32
          unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
8306
32
          size = nestedArraySize * alignedSize;
8307
32
        } else {
8308
0
          size = DL.getTypeAllocSize(EltTy);
8309
0
        }
8310
32
      }
8311
      // Skip 0 idx.
8312
18.0k
      if (bImmIdx && immIdx == 0)
8313
18.0k
        continue;
8314
      // Align to 4 * 4 bytes.
8315
0
      size = (size + 15) & 0xfffffff0;
8316
8317
      // Take this as array idxing.
8318
0
      if (bImmIdx) {
8319
0
        unsigned tempOffset = size * immIdx;
8320
0
        unsigned idxInc = tempOffset >> 4;
8321
0
        legacyIndex =
8322
0
            Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
8323
0
      } else {
8324
0
        Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4));
8325
0
        legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
8326
0
      }
8327
8328
      // Array always start from x channel.
8329
0
      channel = 0;
8330
26.1k
    } else if (GEPIt->isStructTy()) {
8331
21.4k
      StructType *ST = cast<StructType>(*GEPIt);
8332
21.4k
      DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
8333
21.4k
      fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
8334
8335
21.4k
      unsigned idxInc = 0;
8336
21.4k
      unsigned structOffset = 0;
8337
21.4k
      if (fieldAnnotation->GetCompType().Is16Bit() &&
8338
21.4k
          
!hlslOP->UseMinPrecision()1.10k
) {
8339
764
        structOffset = fieldAnnotation->GetCBufferOffset() >> 1;
8340
764
        channel += structOffset;
8341
764
        idxInc = channel >> 3;
8342
764
        channel = channel & 0x7;
8343
20.7k
      } else {
8344
20.7k
        structOffset = fieldAnnotation->GetCBufferOffset() >> 2;
8345
20.7k
        channel += structOffset;
8346
20.7k
        idxInc = channel >> 2;
8347
20.7k
        channel = channel & 0x3;
8348
20.7k
      }
8349
21.4k
      if (idxInc)
8350
8.27k
        legacyIndex =
8351
8.27k
            Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
8352
21.4k
    } else 
if (4.64k
GEPIt->isArrayTy()4.64k
) {
8353
4.17k
      DXASSERT(fieldAnnotation != nullptr, "must a field");
8354
4.17k
      unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
8355
4.17k
          *fieldAnnotation, *GEPIt, dxilTypeSys);
8356
      // Decide the nested array size.
8357
4.17k
      unsigned nestedArraySize = 1;
8358
8359
4.17k
      Type *EltTy = GEPIt->getArrayElementType();
8360
      // support multi level of array
8361
4.78k
      while (EltTy->isArrayTy()) {
8362
606
        ArrayType *EltAT = cast<ArrayType>(EltTy);
8363
606
        nestedArraySize *= EltAT->getNumElements();
8364
606
        EltTy = EltAT->getElementType();
8365
606
      }
8366
      // Align to 4 * 4 bytes.
8367
4.17k
      unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
8368
4.17k
      unsigned size = nestedArraySize * alignedSize;
8369
4.17k
      if (bImmIdx) {
8370
1.41k
        unsigned tempOffset = size * immIdx;
8371
1.41k
        unsigned idxInc = tempOffset >> 4;
8372
1.41k
        legacyIndex =
8373
1.41k
            Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
8374
2.76k
      } else {
8375
2.76k
        Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4));
8376
2.76k
        legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
8377
2.76k
      }
8378
8379
      // Array always start from x channel.
8380
4.17k
      channel = 0;
8381
4.17k
    } else 
if (470
GEPIt->isVectorTy()470
) {
8382
      // Indexing on vector.
8383
470
      if (bImmIdx) {
8384
422
        if (immIdx < GEPIt->getVectorNumElements()) {
8385
394
          const unsigned vectorElmSize =
8386
394
              DL.getTypeAllocSize(GEPIt->getVectorElementType());
8387
394
          const bool bIs16bitType = vectorElmSize == 2;
8388
394
          const unsigned tempOffset = vectorElmSize * immIdx;
8389
394
          const unsigned numChannelsPerRow = bIs16bitType ? 
832
:
4362
;
8390
394
          const unsigned channelInc =
8391
394
              bIs16bitType ? 
tempOffset >> 132
:
tempOffset >> 2362
;
8392
8393
394
          DXASSERT((channel + channelInc) < numChannelsPerRow,
8394
394
                   "vector should not cross cb register");
8395
394
          channel += channelInc;
8396
394
          if (channel == numChannelsPerRow) {
8397
            // Get to another row.
8398
            // Update index and channel.
8399
0
            channel = 0;
8400
0
            legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
8401
0
          }
8402
394
        } else {
8403
28
          StringRef resName = "(unknown)";
8404
28
          if (DxilResourceBase *Res =
8405
28
                  pObjHelper->FindCBufferResourceFromHandle(handle)) {
8406
28
            resName = Res->GetGlobalName();
8407
28
          }
8408
28
          legacyIndex = hlsl::CreatePoisonValue(
8409
28
              legacyIndex->getType(),
8410
28
              Twine("Out of bounds index (") + Twine(immIdx) +
8411
28
                  Twine(") in CBuffer '") + Twine(resName) + ("'"),
8412
28
              GEP->getDebugLoc(), GEP);
8413
28
          channel = 0;
8414
28
        }
8415
422
      } else {
8416
48
        Type *EltTy = GEPIt->getVectorElementType();
8417
48
        unsigned vecSize = GEPIt->getVectorNumElements();
8418
8419
        // Load the whole register.
8420
48
        Value *newLd =
8421
48
            GenerateCBLoadLegacy(handle, legacyIndex,
8422
48
                                 /*channelOffset*/ channel, EltTy,
8423
48
                                 /*vecSize*/ vecSize, hlslOP, Builder);
8424
        // Copy to array.
8425
48
        IRBuilder<> AllocaBuilder(GEP->getParent()
8426
48
                                      ->getParent()
8427
48
                                      ->getEntryBlock()
8428
48
                                      .getFirstInsertionPt());
8429
48
        Value *tempArray =
8430
48
            AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, vecSize));
8431
48
        Value *zeroIdx = hlslOP->GetU32Const(0);
8432
216
        for (unsigned i = 0; i < vecSize; 
i++168
) {
8433
168
          Value *Elt = Builder.CreateExtractElement(newLd, i);
8434
168
          Value *EltGEP = Builder.CreateInBoundsGEP(
8435
168
              tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
8436
168
          Builder.CreateStore(Elt, EltGEP);
8437
168
        }
8438
        // Make sure this is the end of GEP.
8439
48
        gep_type_iterator temp = GEPIt;
8440
48
        temp++;
8441
48
        DXASSERT(temp == E, "scalar type must be the last");
8442
8443
        // Replace the GEP with array GEP.
8444
48
        Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
8445
48
        GEP->replaceAllUsesWith(ArrayGEP);
8446
48
        return;
8447
48
      }
8448
470
    } else {
8449
0
      gep_type_iterator temp = GEPIt;
8450
0
      temp++;
8451
0
      DXASSERT(temp == E, "scalar type must be the last");
8452
0
    }
8453
44.2k
  }
8454
8455
43.0k
  
for (auto U = GEP->user_begin(); 18.0k
U != GEP->user_end();) {
8456
24.9k
    Instruction *user = cast<Instruction>(*(U++));
8457
8458
24.9k
    TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP,
8459
24.9k
                                 fieldAnnotation, dxilTypeSys, DL, pObjHelper);
8460
24.9k
  }
8461
18.0k
}
8462
8463
void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
8464
                                 DxilTypeSystem &dxilTypeSys,
8465
                                 const DataLayout &DL,
8466
8.72k
                                 HLObjectOperationLowerHelper *pObjHelper) {
8467
8.72k
  auto User = ptr->user_begin();
8468
8.72k
  auto UserE = ptr->user_end();
8469
8.72k
  Value *zeroIdx = hlslOP->GetU32Const(0);
8470
26.7k
  for (; User != UserE;) {
8471
    // Must be Instruction.
8472
18.0k
    Instruction *I = cast<Instruction>(*(User++));
8473
18.0k
    TranslateCBAddressUserLegacy(
8474
18.0k
        I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
8475
18.0k
        /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
8476
18.0k
  }
8477
8.72k
}
8478
8479
} // namespace
8480
8481
// Structured buffer.
8482
namespace {
8483
8484
Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
8485
                        Value *status, Type *EltTy,
8486
                        MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
8487
                        IRBuilder<> &Builder, unsigned NumComponents,
8488
28
                        Constant *alignment) {
8489
28
  OP::OpCode opcode = OP::OpCode::RawBufferLoad;
8490
8491
28
  DXASSERT(resultElts.size() <= 4,
8492
28
           "buffer load cannot load more than 4 values");
8493
8494
28
  if (bufIdx == nullptr) {
8495
    // This is actually a byte address buffer load with a struct template type.
8496
    // The call takes only one coordinates for the offset.
8497
0
    bufIdx = offset;
8498
0
    offset = UndefValue::get(offset->getType());
8499
0
  }
8500
8501
28
  Function *dxilF = OP->GetOpFunc(opcode, EltTy);
8502
28
  Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
8503
28
  Value *Args[] = {OP->GetU32Const((unsigned)opcode),
8504
28
                   handle,
8505
28
                   bufIdx,
8506
28
                   offset,
8507
28
                   mask,
8508
28
                   alignment};
8509
28
  Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
8510
8511
56
  for (unsigned i = 0; i < resultElts.size(); 
i++28
) {
8512
28
    resultElts[i] = Builder.CreateExtractValue(Ld, i);
8513
28
  }
8514
8515
  // status
8516
28
  UpdateStatus(Ld, status, Builder, OP);
8517
28
  return Ld;
8518
28
}
8519
8520
void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
8521
                         Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
8522
                         ArrayRef<Value *> vals, uint8_t mask,
8523
60
                         Constant *alignment) {
8524
60
  OP::OpCode opcode = OP::OpCode::RawBufferStore;
8525
60
  DXASSERT(vals.size() == 4, "buffer store need 4 values");
8526
8527
60
  Value *Args[] = {OP->GetU32Const((unsigned)opcode),
8528
60
                   handle,
8529
60
                   bufIdx,
8530
60
                   offset,
8531
60
                   vals[0],
8532
60
                   vals[1],
8533
60
                   vals[2],
8534
60
                   vals[3],
8535
60
                   OP->GetU8Const(mask),
8536
60
                   alignment};
8537
60
  Function *dxilF = OP->GetOpFunc(opcode, EltTy);
8538
60
  Builder.CreateCall(dxilF, Args);
8539
60
}
8540
8541
Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder,
8542
                               Value *handle, HLResource::Kind RK, hlsl::OP *OP,
8543
                               Value *status, Value *bufIdx, Value *baseOffset,
8544
814
                               const DataLayout &DL) {
8545
8546
814
  ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status);
8547
#ifndef NDEBUG
8548
  Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
8549
  Type *matType = ptr->getType()->getPointerElementType();
8550
  HLMatrixType MatTy = HLMatrixType::cast(matType);
8551
  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) ==
8552
               helper.retVal->getType(),
8553
           "helper type should match vectorized matrix");
8554
#endif
8555
814
  return TranslateBufLoad(helper, RK, Builder, OP, DL);
8556
814
}
8557
8558
void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
8559
                             hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
8560
1.18k
                             Value *val, const DataLayout &DL) {
8561
1.18k
  [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType);
8562
1.18k
  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(),
8563
1.18k
           "helper type should match vectorized matrix");
8564
1.18k
  TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
8565
1.18k
                 baseOffset, Builder, OP);
8566
1.18k
}
8567
8568
void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK,
8569
                               hlsl::OP *OP, Value *status, Value *bufIdx,
8570
2.00k
                               Value *baseOffset, const DataLayout &DL) {
8571
2.00k
  IRBuilder<> Builder(CI);
8572
2.00k
  HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
8573
2.00k
  unsigned opcode = GetHLOpcode(CI);
8574
2.00k
  DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
8575
2.00k
                    "only translate matrix loadStore here.");
8576
2.00k
  HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
8577
  // Due to the current way the initial codegen generates matrix
8578
  // orientation casts, the in-register vector matrix has already been
8579
  // reordered based on the destination's row or column-major packing
8580
  // orientation.
8581
2.00k
  switch (matOp) {
8582
242
  case HLMatLoadStoreOpcode::RowMatLoad:
8583
814
  case HLMatLoadStoreOpcode::ColMatLoad:
8584
814
    TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx,
8585
814
                            baseOffset, DL);
8586
814
    break;
8587
194
  case HLMatLoadStoreOpcode::RowMatStore:
8588
1.18k
  case HLMatLoadStoreOpcode::ColMatStore: {
8589
1.18k
    Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
8590
1.18k
    Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
8591
1.18k
    TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
8592
1.18k
                            handle, OP, bufIdx, baseOffset, val, DL);
8593
1.18k
  } break;
8594
2.00k
  }
8595
8596
2.00k
  CI->eraseFromParent();
8597
2.00k
}
8598
8599
void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
8600
                                     HLResource::Kind ResKind, Value *bufIdx,
8601
                                     Value *baseOffset, Value *status,
8602
                                     hlsl::OP *OP, const DataLayout &DL);
8603
8604
// For case like mat[i][j].
8605
// IdxList is [i][0], [i][1], [i][2],[i][3].
8606
// Idx is j.
8607
// return [i][j] not mat[i][j] because resource ptr and temp ptr need different
8608
// code gen.
8609
static Value *LowerGEPOnMatIndexListToIndex(llvm::GetElementPtrInst *GEP,
8610
24
                                            ArrayRef<Value *> IdxList) {
8611
24
  IRBuilder<> Builder(GEP);
8612
24
  Value *zero = Builder.getInt32(0);
8613
24
  DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
8614
24
  Value *baseIdx = (GEP->idx_begin())->get();
8615
24
  DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0");
8616
24
  Value *Idx = (GEP->idx_begin() + 1)->get();
8617
8618
24
  if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) {
8619
16
    return IdxList[immIdx->getSExtValue()];
8620
16
  }
8621
8622
8
  IRBuilder<> AllocaBuilder(
8623
8
      GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
8624
8
  unsigned size = IdxList.size();
8625
  // Store idxList to temp array.
8626
8
  ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size);
8627
8
  Value *tempArray = AllocaBuilder.CreateAlloca(AT);
8628
8629
40
  for (unsigned i = 0; i < size; 
i++32
) {
8630
32
    Value *EltPtr = Builder.CreateGEP(tempArray, {zero, Builder.getInt32(i)});
8631
32
    Builder.CreateStore(IdxList[i], EltPtr);
8632
32
  }
8633
  // Load the idx.
8634
8
  Value *GEPOffset = Builder.CreateGEP(tempArray, {zero, Idx});
8635
8
  return Builder.CreateLoad(GEPOffset);
8636
24
}
8637
8638
// subscript operator for matrix of struct element.
8639
void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
8640
                                    HLResource::Kind ResKind, Value *bufIdx,
8641
                                    Value *baseOffset, Value *status,
8642
146
                                    hlsl::OP *hlslOP, const DataLayout &DL) {
8643
146
  unsigned opcode = GetHLOpcode(CI);
8644
146
  IRBuilder<> subBuilder(CI);
8645
146
  HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
8646
146
  Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
8647
146
  HLMatrixType MatTy =
8648
146
      HLMatrixType::cast(basePtr->getType()->getPointerElementType());
8649
146
  Type *EltTy = MatTy.getElementTypeForReg();
8650
146
  Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
8651
8652
146
  Value *EltByteSize = ConstantInt::get(
8653
146
      baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
8654
8655
146
  Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
8656
8657
146
  Type *resultType = CI->getType()->getPointerElementType();
8658
146
  unsigned resultSize = 1;
8659
146
  if (resultType->isVectorTy())
8660
90
    resultSize = resultType->getVectorNumElements();
8661
146
  DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
8662
146
  assert(resultSize <= 16);
8663
146
  std::vector<Value *> idxList(resultSize);
8664
8665
146
  switch (subOp) {
8666
90
  case HLSubscriptOpcode::ColMatSubscript:
8667
90
  case HLSubscriptOpcode::RowMatSubscript: {
8668
274
    for (unsigned i = 0; i < resultSize; 
i++184
) {
8669
184
      Value *offset =
8670
184
          CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
8671
184
      offset = subBuilder.CreateMul(offset, EltByteSize);
8672
184
      idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
8673
184
    }
8674
90
  } break;
8675
0
  case HLSubscriptOpcode::RowMatElement:
8676
56
  case HLSubscriptOpcode::ColMatElement: {
8677
56
    Constant *EltIdxs = cast<Constant>(idx);
8678
112
    for (unsigned i = 0; i < resultSize; 
i++56
) {
8679
56
      Value *offset =
8680
56
          subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
8681
56
      idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
8682
56
    }
8683
56
  } break;
8684
0
  default:
8685
0
    DXASSERT(0, "invalid operation on const buffer");
8686
0
    break;
8687
146
  }
8688
8689
146
  Value *undefElt = UndefValue::get(EltTy);
8690
8691
292
  for (auto U = CI->user_begin(); U != CI->user_end();) {
8692
146
    Value *subsUser = *(U++);
8693
146
    if (resultSize == 1) {
8694
88
      TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle,
8695
88
                                      ResKind, bufIdx, idxList[0], status,
8696
88
                                      hlslOP, DL);
8697
88
      continue;
8698
88
    }
8699
58
    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
8700
24
      Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList);
8701
8702
48
      for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
8703
24
        Instruction *gepUserInst = cast<Instruction>(*(gepU++));
8704
24
        TranslateStructBufSubscriptUser(gepUserInst, handle, ResKind, bufIdx,
8705
24
                                        GEPOffset, status, hlslOP, DL);
8706
24
      }
8707
8708
24
      GEP->eraseFromParent();
8709
34
    } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
8710
      // Store elements of matrix in a struct. Needs to be done one scalar at a
8711
      // time even for vectors in the case that matrix orientation spreads the
8712
      // indexed scalars throughout the matrix vector.
8713
22
      IRBuilder<> stBuilder(stUser);
8714
22
      Value *Val = stUser->getValueOperand();
8715
22
      if (Val->getType()->isVectorTy()) {
8716
82
        for (unsigned i = 0; i < resultSize; 
i++60
) {
8717
60
          Value *EltVal = stBuilder.CreateExtractElement(Val, i);
8718
60
          uint8_t mask = DXIL::kCompMask_X;
8719
60
          GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
8720
60
                              stBuilder, {EltVal, undefElt, undefElt, undefElt},
8721
60
                              mask, alignment);
8722
60
        }
8723
22
      } else {
8724
0
        uint8_t mask = DXIL::kCompMask_X;
8725
0
        GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
8726
0
                            stBuilder, {Val, undefElt, undefElt, undefElt},
8727
0
                            mask, alignment);
8728
0
      }
8729
8730
22
      stUser->eraseFromParent();
8731
22
    } else {
8732
      // Must be load here.
8733
12
      LoadInst *ldUser = cast<LoadInst>(subsUser);
8734
12
      IRBuilder<> ldBuilder(ldUser);
8735
12
      Value *ldData = UndefValue::get(resultType);
8736
      // Load elements of matrix in a struct. Needs to be done one scalar at a
8737
      // time even for vectors in the case that matrix orientation spreads the
8738
      // indexed scalars throughout the matrix vector.
8739
12
      if (resultType->isVectorTy()) {
8740
40
        for (unsigned i = 0; i < resultSize; 
i++28
) {
8741
28
          Value *ResultElt;
8742
          // TODO: This can be inefficient for row major matrix load
8743
28
          GenerateRawBufLd(handle, bufIdx, idxList[i],
8744
28
                           /*status*/ nullptr, EltTy, ResultElt, hlslOP,
8745
28
                           ldBuilder, 1, alignment);
8746
28
          ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
8747
28
        }
8748
12
      } else {
8749
0
        GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr, EltTy,
8750
0
                         ldData, hlslOP, ldBuilder, 4, alignment);
8751
0
      }
8752
12
      ldUser->replaceAllUsesWith(ldData);
8753
12
      ldUser->eraseFromParent();
8754
12
    }
8755
58
  }
8756
8757
146
  CI->eraseFromParent();
8758
146
}
8759
8760
void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
8761
                                     HLResource::Kind ResKind, Value *bufIdx,
8762
                                     Value *baseOffset, Value *status,
8763
37.3k
                                     hlsl::OP *OP, const DataLayout &DL) {
8764
37.3k
  IRBuilder<> Builder(user);
8765
37.3k
  if (CallInst *userCall = dyn_cast<CallInst>(user)) {
8766
3.68k
    HLOpcodeGroup group = // user call?
8767
3.68k
        hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
8768
3.68k
    unsigned opcode = GetHLOpcode(userCall);
8769
    // For case element type of structure buffer is not structure type.
8770
3.68k
    if (baseOffset == nullptr)
8771
0
      baseOffset = OP->GetU32Const(0);
8772
3.68k
    if (group == HLOpcodeGroup::HLIntrinsic) {
8773
1.53k
      IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
8774
1.53k
      switch (IOP) {
8775
0
      case IntrinsicOp::MOP_Load: {
8776
0
        if (userCall->getType()->isPointerTy()) {
8777
          // Struct will return pointers which like []
8778
8779
0
        } else {
8780
          // Use builtin types on structuredBuffer.
8781
0
        }
8782
0
        DXASSERT(0, "not implement yet");
8783
0
      } break;
8784
364
      case IntrinsicOp::IOP_InterlockedAdd: {
8785
364
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8786
364
                            baseOffset);
8787
364
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
8788
364
                                       Builder, OP);
8789
364
      } break;
8790
72
      case IntrinsicOp::IOP_InterlockedAnd: {
8791
72
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8792
72
                            baseOffset);
8793
72
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
8794
72
                                       Builder, OP);
8795
72
      } break;
8796
224
      case IntrinsicOp::IOP_InterlockedExchange: {
8797
224
        Type *opType = nullptr;
8798
224
        PointerType *ptrType = dyn_cast<PointerType>(
8799
224
            userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)
8800
224
                ->getType());
8801
224
        if (ptrType && ptrType->getElementType()->isFloatTy())
8802
12
          opType = Type::getInt32Ty(userCall->getContext());
8803
224
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8804
224
                            baseOffset, opType);
8805
224
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
8806
224
                                       Builder, OP);
8807
224
      } break;
8808
40
      case IntrinsicOp::IOP_InterlockedMax: {
8809
40
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8810
40
                            baseOffset);
8811
40
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
8812
40
                                       Builder, OP);
8813
40
      } break;
8814
40
      case IntrinsicOp::IOP_InterlockedMin: {
8815
40
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8816
40
                            baseOffset);
8817
40
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
8818
40
                                       Builder, OP);
8819
40
      } break;
8820
52
      case IntrinsicOp::IOP_InterlockedUMax: {
8821
52
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8822
52
                            baseOffset);
8823
52
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
8824
52
                                       Builder, OP);
8825
52
      } break;
8826
40
      case IntrinsicOp::IOP_InterlockedUMin: {
8827
40
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8828
40
                            baseOffset);
8829
40
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
8830
40
                                       Builder, OP);
8831
40
      } break;
8832
96
      case IntrinsicOp::IOP_InterlockedOr: {
8833
96
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8834
96
                            baseOffset);
8835
96
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
8836
96
                                       Builder, OP);
8837
96
      } break;
8838
72
      case IntrinsicOp::IOP_InterlockedXor: {
8839
72
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
8840
72
                            baseOffset);
8841
72
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
8842
72
                                       Builder, OP);
8843
72
      } break;
8844
262
      case IntrinsicOp::IOP_InterlockedCompareStore:
8845
508
      case IntrinsicOp::IOP_InterlockedCompareExchange: {
8846
508
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
8847
508
                            handle, bufIdx, baseOffset);
8848
508
        TranslateAtomicCmpXChg(helper, Builder, OP);
8849
508
      } break;
8850
14
      case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
8851
28
      case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
8852
28
        Type *i32Ty = Type::getInt32Ty(userCall->getContext());
8853
28
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
8854
28
                            handle, bufIdx, baseOffset, i32Ty);
8855
28
        TranslateAtomicCmpXChg(helper, Builder, OP);
8856
28
      } break;
8857
0
      default:
8858
0
        DXASSERT(0, "invalid opcode");
8859
0
        break;
8860
1.53k
      }
8861
1.53k
      userCall->eraseFromParent();
8862
2.14k
    } else if (group == HLOpcodeGroup::HLMatLoadStore)
8863
      // Load/Store matrix within a struct
8864
2.00k
      TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx,
8865
2.00k
                                baseOffset, DL);
8866
146
    else if (group == HLOpcodeGroup::HLSubscript) {
8867
      // Subscript of matrix within a struct
8868
146
      TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx,
8869
146
                                     baseOffset, status, OP, DL);
8870
146
    }
8871
33.6k
  } else if (LoadInst *LdInst = dyn_cast<LoadInst>(user)) {
8872
    // Load of scalar/vector within a struct or structured raw load.
8873
9.17k
    ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status);
8874
9.17k
    TranslateBufLoad(helper, ResKind, Builder, OP, DL);
8875
8876
9.17k
    LdInst->eraseFromParent();
8877
24.4k
  } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) {
8878
    // Store of scalar/vector within a struct or structured raw store.
8879
9.14k
    Value *val = StInst->getValueOperand();
8880
9.14k
    TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
8881
9.14k
                   baseOffset, Builder, OP);
8882
9.14k
    StInst->eraseFromParent();
8883
15.3k
  } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
8884
    // Recurse users
8885
76
    for (auto U = BCI->user_begin(); U != BCI->user_end();) {
8886
46
      Value *BCIUser = *(U++);
8887
46
      TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser), handle,
8888
46
                                      ResKind, bufIdx, baseOffset, status, OP,
8889
46
                                      DL);
8890
46
    }
8891
30
    BCI->eraseFromParent();
8892
15.2k
  } else if (PHINode *Phi = dyn_cast<PHINode>(user)) {
8893
4
    if (Phi->getNumIncomingValues() != 1) {
8894
0
      dxilutil::EmitErrorOnInstruction(
8895
0
          Phi, "Phi not supported for buffer subscript");
8896
0
      return;
8897
0
    }
8898
    // Since the phi only has a single value we can safely process its
8899
    // users to translate the subscript. These single-value phis are
8900
    // inserted by the lcssa pass.
8901
8
    
for (auto U = Phi->user_begin(); 4
U != Phi->user_end();) {
8902
4
      Value *PhiUser = *(U++);
8903
4
      TranslateStructBufSubscriptUser(cast<Instruction>(PhiUser), handle,
8904
4
                                      ResKind, bufIdx, baseOffset, status, OP,
8905
4
                                      DL);
8906
4
    }
8907
4
    Phi->eraseFromParent();
8908
15.2k
  } else {
8909
    // should only used by GEP
8910
15.2k
    GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
8911
15.2k
    Type *Ty = GEP->getType()->getPointerElementType();
8912
8913
15.2k
    Value *offset = dxilutil::GEPIdxToOffset(GEP, Builder, OP, DL);
8914
15.2k
    DXASSERT_LOCALVAR(Ty,
8915
15.2k
                      offset->getType() == Type::getInt32Ty(Ty->getContext()),
8916
15.2k
                      "else bitness is wrong");
8917
    // No offset into element for Raw buffers; byte offset is in bufIdx.
8918
15.2k
    if (DXIL::IsRawBuffer(ResKind))
8919
574
      bufIdx = Builder.CreateAdd(offset, bufIdx);
8920
14.7k
    else
8921
14.7k
      baseOffset = Builder.CreateAdd(offset, baseOffset);
8922
8923
37.3k
    for (auto U = GEP->user_begin(); U != GEP->user_end();) {
8924
22.1k
      Value *GEPUser = *(U++);
8925
8926
22.1k
      TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle,
8927
22.1k
                                      ResKind, bufIdx, baseOffset, status, OP,
8928
22.1k
                                      DL);
8929
22.1k
    }
8930
    // delete the inst
8931
15.2k
    GEP->eraseFromParent();
8932
15.2k
  }
8933
37.3k
}
8934
8935
void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
8936
                                 hlsl::OP *OP, HLResource::Kind ResKind,
8937
12.9k
                                 const DataLayout &DL) {
8938
12.9k
  Value *subscriptIndex =
8939
12.9k
      CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
8940
12.9k
  Value *bufIdx = nullptr;
8941
12.9k
  Value *offset = nullptr;
8942
12.9k
  bufIdx = subscriptIndex;
8943
12.9k
  if (ResKind == HLResource::Kind::RawBuffer)
8944
284
    offset = UndefValue::get(Type::getInt32Ty(CI->getContext()));
8945
12.6k
  else
8946
    // StructuredBuffer, TypedBuffer, etc.
8947
12.6k
    offset = OP->GetU32Const(0);
8948
8949
27.9k
  for (auto U = CI->user_begin(); U != CI->user_end();) {
8950
15.0k
    Value *user = *(U++);
8951
8952
15.0k
    TranslateStructBufSubscriptUser(cast<Instruction>(user), handle, ResKind,
8953
15.0k
                                    bufIdx, offset, status, OP, DL);
8954
15.0k
  }
8955
12.9k
}
8956
} // namespace
8957
8958
// HLSubscript.
8959
namespace {
8960
8961
Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK,
8962
                                  DXIL::ResourceClass RC, Value *handle,
8963
                                  LoadInst *ldInst, IRBuilder<> &Builder,
8964
2.75k
                                  hlsl::OP *hlslOP, const DataLayout &DL) {
8965
  // The arguments to the call instruction are used to determine the access,
8966
  // the return value and type come from the load instruction.
8967
2.75k
  ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst);
8968
2.75k
  TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
8969
  // delete the ld
8970
2.75k
  ldInst->eraseFromParent();
8971
2.75k
  return ldHelper.retVal;
8972
2.75k
}
8973
8974
Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
8975
16
                       unsigned vectorSize, Instruction *InsertPt) {
8976
16
  IRBuilder<> Builder(InsertPt);
8977
16
  if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
8978
8
    VecVal =
8979
8
        Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
8980
8
  } else {
8981
8
    BasicBlock *BB = InsertPt->getParent();
8982
8
    BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
8983
8984
8
    TerminatorInst *TI = BB->getTerminator();
8985
8
    IRBuilder<> SwitchBuilder(TI);
8986
8
    LLVMContext &Ctx = InsertPt->getContext();
8987
8988
8
    SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
8989
8
    TI->eraseFromParent();
8990
8991
8
    Function *F = EndBB->getParent();
8992
8
    IRBuilder<> endSwitchBuilder(EndBB->begin());
8993
8
    Type *Ty = VecVal->getType();
8994
8
    PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
8995
8996
40
    for (unsigned i = 0; i < vectorSize; 
i++32
) {
8997
32
      BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
8998
32
      Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
8999
32
      IRBuilder<> CaseBuilder(CaseBB);
9000
9001
32
      Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
9002
32
      VecPhi->addIncoming(CaseVal, CaseBB);
9003
32
      CaseBuilder.CreateBr(EndBB);
9004
32
    }
9005
8
    VecPhi->addIncoming(VecVal, BB);
9006
8
    VecVal = VecPhi;
9007
8
  }
9008
16
  return VecVal;
9009
16
}
9010
9011
void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
9012
                                   HLObjectOperationLowerHelper *pObjHelper,
9013
8.32k
                                   bool &Translated) {
9014
8.32k
  Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
9015
9016
8.32k
  hlsl::OP *hlslOP = &helper.hlslOP;
9017
  // Resource ptr.
9018
8.32k
  Value *handle = ptr;
9019
8.32k
  DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
9020
8.32k
  DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
9021
9022
8.32k
  Type *Ty = CI->getType()->getPointerElementType();
9023
9024
16.9k
  for (auto It = CI->user_begin(); It != CI->user_end();) {
9025
8.61k
    User *user = *(It++);
9026
8.61k
    Instruction *I = cast<Instruction>(user);
9027
8.61k
    IRBuilder<> Builder(I);
9028
8.61k
    Value *UndefI = UndefValue::get(Builder.getInt32Ty());
9029
8.61k
    if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
9030
2.71k
      TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP,
9031
2.71k
                                 helper.dataLayout);
9032
5.89k
    } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
9033
3.19k
      Value *val = stInst->getValueOperand();
9034
3.19k
      TranslateStore(RK, handle, val,
9035
3.19k
                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
9036
3.19k
                     UndefI, Builder, hlslOP);
9037
      // delete the st
9038
3.19k
      stInst->eraseFromParent();
9039
3.19k
    } else 
if (GetElementPtrInst *2.70k
GEP2.70k
= dyn_cast<GetElementPtrInst>(user)) {
9040
      // Must be vector type here.
9041
56
      unsigned vectorSize = Ty->getVectorNumElements();
9042
56
      DXASSERT_NOMSG(GEP->getNumIndices() == 2);
9043
56
      Use *GEPIdx = GEP->idx_begin();
9044
56
      GEPIdx++;
9045
56
      Value *EltIdx = *GEPIdx;
9046
96
      for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
9047
56
        User *GEPUser = *(GEPIt++);
9048
56
        if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
9049
16
          IRBuilder<> StBuilder(SI);
9050
          // Generate Ld.
9051
16
          LoadInst *tmpLd = StBuilder.CreateLoad(CI);
9052
9053
16
          Value *ldVal = TranslateTypedBufSubscript(
9054
16
              CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout);
9055
          // Update vector.
9056
16
          ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
9057
16
                                  vectorSize, SI);
9058
          // Generate St.
9059
          // Reset insert point, UpdateVectorElt may move SI to different block.
9060
16
          StBuilder.SetInsertPoint(SI);
9061
16
          TranslateStore(
9062
16
              RK, handle, ldVal,
9063
16
              CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI,
9064
16
              StBuilder, hlslOP);
9065
16
          SI->eraseFromParent();
9066
16
          continue;
9067
16
        }
9068
40
        if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) {
9069
24
          IRBuilder<> LdBuilder(LI);
9070
9071
          // Generate tmp vector load with vector type & translate it
9072
24
          LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
9073
9074
24
          Value *ldVal = TranslateTypedBufSubscript(
9075
24
              CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout);
9076
9077
          // get the single element
9078
24
          ldVal = GenerateVecEltFromGEP(ldVal, GEP, LdBuilder,
9079
24
                                        /*bInsertLdNextToGEP*/ false);
9080
9081
24
          LI->replaceAllUsesWith(ldVal);
9082
24
          LI->eraseFromParent();
9083
24
          continue;
9084
24
        }
9085
        // Invalid operations.
9086
16
        Translated = false;
9087
16
        dxilutil::EmitErrorOnInstruction(GEP,
9088
16
                                         "Invalid operation on typed buffer.");
9089
16
        return;
9090
40
      }
9091
40
      GEP->eraseFromParent();
9092
2.64k
    } else {
9093
2.64k
      CallInst *userCall = cast<CallInst>(user);
9094
2.64k
      HLOpcodeGroup group =
9095
2.64k
          hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
9096
2.64k
      unsigned opcode = hlsl::GetHLOpcode(userCall);
9097
2.64k
      if (group == HLOpcodeGroup::HLIntrinsic) {
9098
2.64k
        IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
9099
2.64k
        if (RC == DXIL::ResourceClass::SRV) {
9100
          // Invalid operations.
9101
0
          Translated = false;
9102
0
          dxilutil::EmitErrorOnInstruction(userCall,
9103
0
                                           "Invalid operation on SRV.");
9104
0
          return;
9105
0
        }
9106
2.64k
        switch (IOP) {
9107
370
        case IntrinsicOp::IOP_InterlockedAdd: {
9108
370
          ResLoadHelper helper(CI, RK, RC, handle,
9109
370
                               IntrinsicOp::IOP_InterlockedAdd);
9110
370
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9111
370
                                  helper.addr, /*offset*/ nullptr);
9112
370
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
9113
370
                                         Builder, hlslOP);
9114
370
        } break;
9115
192
        case IntrinsicOp::IOP_InterlockedAnd: {
9116
192
          ResLoadHelper helper(CI, RK, RC, handle,
9117
192
                               IntrinsicOp::IOP_InterlockedAnd);
9118
192
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9119
192
                                  helper.addr, /*offset*/ nullptr);
9120
192
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
9121
192
                                         Builder, hlslOP);
9122
192
        } break;
9123
356
        case IntrinsicOp::IOP_InterlockedExchange: {
9124
356
          ResLoadHelper helper(CI, RK, RC, handle,
9125
356
                               IntrinsicOp::IOP_InterlockedExchange);
9126
356
          Type *opType = nullptr;
9127
356
          PointerType *ptrType = dyn_cast<PointerType>(
9128
356
              userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)
9129
356
                  ->getType());
9130
356
          if (ptrType && ptrType->getElementType()->isFloatTy())
9131
12
            opType = Type::getInt32Ty(userCall->getContext());
9132
356
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9133
356
                                  helper.addr, /*offset*/ nullptr, opType);
9134
356
          TranslateAtomicBinaryOperation(
9135
356
              atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
9136
356
        } break;
9137
108
        case IntrinsicOp::IOP_InterlockedMax: {
9138
108
          ResLoadHelper helper(CI, RK, RC, handle,
9139
108
                               IntrinsicOp::IOP_InterlockedMax);
9140
108
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9141
108
                                  helper.addr, /*offset*/ nullptr);
9142
108
          TranslateAtomicBinaryOperation(
9143
108
              atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
9144
108
        } break;
9145
108
        case IntrinsicOp::IOP_InterlockedMin: {
9146
108
          ResLoadHelper helper(CI, RK, RC, handle,
9147
108
                               IntrinsicOp::IOP_InterlockedMin);
9148
108
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9149
108
                                  helper.addr, /*offset*/ nullptr);
9150
108
          TranslateAtomicBinaryOperation(
9151
108
              atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
9152
108
        } break;
9153
116
        case IntrinsicOp::IOP_InterlockedUMax: {
9154
116
          ResLoadHelper helper(CI, RK, RC, handle,
9155
116
                               IntrinsicOp::IOP_InterlockedUMax);
9156
116
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9157
116
                                  helper.addr, /*offset*/ nullptr);
9158
116
          TranslateAtomicBinaryOperation(
9159
116
              atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
9160
116
        } break;
9161
116
        case IntrinsicOp::IOP_InterlockedUMin: {
9162
116
          ResLoadHelper helper(CI, RK, RC, handle,
9163
116
                               IntrinsicOp::IOP_InterlockedUMin);
9164
116
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9165
116
                                  helper.addr, /*offset*/ nullptr);
9166
116
          TranslateAtomicBinaryOperation(
9167
116
              atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
9168
116
        } break;
9169
200
        case IntrinsicOp::IOP_InterlockedOr: {
9170
200
          ResLoadHelper helper(CI, RK, RC, handle,
9171
200
                               IntrinsicOp::IOP_InterlockedOr);
9172
200
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9173
200
                                  helper.addr, /*offset*/ nullptr);
9174
200
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
9175
200
                                         Builder, hlslOP);
9176
200
        } break;
9177
192
        case IntrinsicOp::IOP_InterlockedXor: {
9178
192
          ResLoadHelper helper(CI, RK, RC, handle,
9179
192
                               IntrinsicOp::IOP_InterlockedXor);
9180
192
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9181
192
                                  helper.addr, /*offset*/ nullptr);
9182
192
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
9183
192
                                         Builder, hlslOP);
9184
192
        } break;
9185
442
        case IntrinsicOp::IOP_InterlockedCompareStore:
9186
860
        case IntrinsicOp::IOP_InterlockedCompareExchange: {
9187
860
          ResLoadHelper helper(CI, RK, RC, handle,
9188
860
                               IntrinsicOp::IOP_InterlockedCompareExchange);
9189
860
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
9190
860
                                  handle, helper.addr, /*offset*/ nullptr);
9191
860
          TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
9192
860
        } break;
9193
14
        case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
9194
28
        case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
9195
28
          Type *i32Ty = Type::getInt32Ty(userCall->getContext());
9196
28
          ResLoadHelper helper(CI, RK, RC, handle,
9197
28
                               IntrinsicOp::IOP_InterlockedCompareExchange);
9198
28
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
9199
28
                                  handle, helper.addr, /*offset*/ nullptr,
9200
28
                                  i32Ty);
9201
28
          TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
9202
28
        } break;
9203
0
        default:
9204
0
          DXASSERT(0, "invalid opcode");
9205
0
          break;
9206
2.64k
        }
9207
2.64k
      } else {
9208
0
        DXASSERT(0, "invalid group");
9209
0
      }
9210
2.64k
      userCall->eraseFromParent();
9211
2.64k
    }
9212
8.61k
  }
9213
8.32k
}
9214
} // namespace
9215
9216
void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
9217
                          HLOperationLowerHelper &helper,
9218
                          HLObjectOperationLowerHelper *pObjHelper,
9219
29.6k
                          bool &Translated) {
9220
29.6k
  if (CI->user_empty()) {
9221
0
    Translated = true;
9222
0
    return;
9223
0
  }
9224
29.6k
  hlsl::OP *hlslOP = &helper.hlslOP;
9225
9226
29.6k
  Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
9227
29.6k
  if (opcode == HLSubscriptOpcode::CBufferSubscript) {
9228
8.72k
    dxilutil::MergeGepUse(CI);
9229
    // Resource ptr.
9230
8.72k
    Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
9231
8.72k
    TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
9232
8.72k
                                helper.dataLayout, pObjHelper);
9233
8.72k
    Translated = true;
9234
8.72k
    return;
9235
8.72k
  }
9236
9237
20.8k
  if (opcode == HLSubscriptOpcode::DoubleSubscript) {
9238
    // Resource ptr.
9239
180
    Value *handle = ptr;
9240
180
    DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
9241
180
    Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
9242
180
    Value *mipLevel =
9243
180
        CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
9244
9245
180
    auto U = CI->user_begin();
9246
180
    DXASSERT(CI->hasOneUse(), "subscript should only have one use");
9247
180
    IRBuilder<> Builder(CI);
9248
180
    if (LoadInst *ldInst = dyn_cast<LoadInst>(*U)) {
9249
140
      Value *Offset = UndefValue::get(Builder.getInt32Ty());
9250
140
      ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset,
9251
140
                             /*status*/ nullptr, mipLevel);
9252
140
      TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
9253
140
      ldInst->eraseFromParent();
9254
140
    } else {
9255
40
      StoreInst *stInst = cast<StoreInst>(*U);
9256
40
      Value *val = stInst->getValueOperand();
9257
40
      Value *UndefI = UndefValue::get(Builder.getInt32Ty());
9258
40
      TranslateStore(RK, handle, val,
9259
40
                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
9260
40
                     UndefI, Builder, hlslOP, mipLevel);
9261
40
      stInst->eraseFromParent();
9262
40
    }
9263
180
    Translated = true;
9264
180
    return;
9265
180
  }
9266
9267
20.6k
  Type *HandleTy = hlslOP->GetHandleType();
9268
20.6k
  if (ptr->getType() == hlslOP->GetNodeRecordHandleType()) {
9269
0
    DXASSERT(false, "Shouldn't get here, NodeRecord subscripts should have "
9270
0
                    "been lowered in LowerRecordAccessToGetNodeRecordPtr");
9271
0
    return;
9272
0
  }
9273
9274
20.6k
  if (ptr->getType() == HandleTy) {
9275
    // Resource ptr.
9276
20.1k
    Value *handle = ptr;
9277
20.1k
    DXIL::ResourceKind RK = DxilResource::Kind::Invalid;
9278
20.1k
    Type *ObjTy = nullptr;
9279
20.1k
    Type *RetTy = nullptr;
9280
20.1k
    RK = pObjHelper->GetRK(handle);
9281
20.1k
    if (RK == DxilResource::Kind::Invalid) {
9282
0
      Translated = false;
9283
0
      return;
9284
0
    }
9285
20.1k
    ObjTy = pObjHelper->GetResourceType(handle);
9286
20.1k
    RetTy = ObjTy->getStructElementType(0);
9287
20.1k
    Translated = true;
9288
9289
20.1k
    if (DXIL::IsStructuredBuffer(RK))
9290
11.8k
      TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
9291
11.8k
                                  helper.dataLayout);
9292
8.32k
    else
9293
8.32k
      TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated);
9294
9295
20.1k
    return;
9296
20.1k
  }
9297
9298
496
  Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
9299
496
  if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
9300
    // Translate matrix into vector of array for share memory or local
9301
    // variable should be done in HLMatrixLowerPass
9302
0
    DXASSERT_NOMSG(0);
9303
0
    Translated = true;
9304
0
    return;
9305
0
  }
9306
9307
  // Other case should be take care in TranslateStructBufSubscript or
9308
  // TranslateCBOperations.
9309
496
  Translated = false;
9310
496
}
9311
9312
void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper,
9313
12.6k
                                 HLObjectOperationLowerHelper *pObjHelper) {
9314
42.2k
  for (auto U = F->user_begin(); U != F->user_end();) {
9315
29.6k
    Value *user = *(U++);
9316
29.6k
    if (!isa<Instruction>(user))
9317
0
      continue;
9318
    // must be call inst
9319
29.6k
    CallInst *CI = cast<CallInst>(user);
9320
29.6k
    unsigned opcode = GetHLOpcode(CI);
9321
29.6k
    bool Translated = true;
9322
29.6k
    TranslateHLSubscript(CI, static_cast<HLSubscriptOpcode>(opcode), helper,
9323
29.6k
                         pObjHelper, Translated);
9324
29.6k
    if (Translated) {
9325
      // delete the call
9326
29.0k
      DXASSERT(CI->use_empty(),
9327
29.0k
               "else TranslateHLSubscript didn't replace/erase uses");
9328
29.0k
      CI->eraseFromParent();
9329
29.0k
    }
9330
29.6k
  }
9331
12.6k
}
9332
9333
// Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast
9334
// of alloca, and return load from alloca If bOrigAllocaTy is true: create
9335
// alloca of old type instead, write to alloca, and return load from bitcast of
9336
// alloca
9337
static Instruction *BitCastValueOrPtr(Value *V, Instruction *Insert, Type *Ty,
9338
                                      bool bOrigAllocaTy = false,
9339
164
                                      const Twine &Name = "") {
9340
164
  IRBuilder<> Builder(Insert);
9341
164
  if (Ty->isPointerTy()) {
9342
    // If pointer, we can bitcast directly
9343
0
    return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
9344
0
  }
9345
9346
  // If value, we have to alloca, store to bitcast ptr, and load
9347
164
  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
9348
164
  Type *allocaTy = bOrigAllocaTy ? 
V->getType()0
: Ty;
9349
164
  Type *otherTy = bOrigAllocaTy ? 
Ty0
: V->getType();
9350
164
  Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
9351
164
  Instruction *bitCast = cast<Instruction>(
9352
164
      Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
9353
164
  Builder.CreateStore(V, bOrigAllocaTy ? 
allocaInst0
: bitCast);
9354
164
  return Builder.CreateLoad(bOrigAllocaTy ? 
bitCast0
: allocaInst, Name);
9355
164
}
9356
9357
static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal,
9358
0
                                           unsigned toRows, unsigned toCols) {
9359
0
  SmallVector<int, 16> castMask(toCols * toRows);
9360
0
  unsigned idx = 0;
9361
0
  for (unsigned r = 0; r < toRows; r++)
9362
0
    for (unsigned c = 0; c < toCols; c++)
9363
0
      castMask[idx++] = c * toRows + r;
9364
0
  return cast<Instruction>(
9365
0
      Builder.CreateShuffleVector(vecVal, vecVal, castMask));
9366
0
}
9367
9368
void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
9369
                                 hlsl::HLOpcodeGroup group,
9370
85.8k
                                 HLObjectOperationLowerHelper *pObjHelper) {
9371
85.8k
  if (group == HLOpcodeGroup::HLIntrinsic) {
9372
    // map to dxil operations
9373
88.4k
    for (auto U = F->user_begin(); U != F->user_end();) {
9374
65.2k
      Value *User = *(U++);
9375
65.2k
      if (!isa<Instruction>(User))
9376
0
        continue;
9377
      // must be call inst
9378
65.2k
      CallInst *CI = cast<CallInst>(User);
9379
9380
      // Keep the instruction to lower by other function.
9381
65.2k
      bool Translated = true;
9382
9383
65.2k
      TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
9384
9385
65.2k
      if (Translated) {
9386
        // delete the call
9387
64.2k
        DXASSERT(CI->use_empty(),
9388
64.2k
                 "else TranslateBuiltinIntrinsic didn't replace/erase uses");
9389
64.2k
        CI->eraseFromParent();
9390
64.2k
      }
9391
65.2k
    }
9392
62.6k
  } else {
9393
62.6k
    if (group == HLOpcodeGroup::HLMatLoadStore) {
9394
      // Both ld/st use arg1 for the pointer.
9395
0
      Type *PtrTy =
9396
0
          F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
9397
9398
0
      if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
9399
        // Translate matrix into vector of array for shared memory
9400
        // variable should be done in HLMatrixLowerPass.
9401
0
        if (!F->user_empty())
9402
0
          F->getContext().emitError("Fail to lower matrix load/store.");
9403
0
      } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
9404
        // Default address space may be function argument in lib target
9405
0
        if (!F->user_empty()) {
9406
0
          for (auto U = F->user_begin(); U != F->user_end();) {
9407
0
            Value *User = *(U++);
9408
0
            if (!isa<Instruction>(User))
9409
0
              continue;
9410
            // must be call inst
9411
0
            CallInst *CI = cast<CallInst>(User);
9412
0
            IRBuilder<> Builder(CI);
9413
0
            HLMatLoadStoreOpcode opcode =
9414
0
                static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
9415
0
            switch (opcode) {
9416
0
            case HLMatLoadStoreOpcode::ColMatStore:
9417
0
            case HLMatLoadStoreOpcode::RowMatStore: {
9418
0
              Value *vecVal =
9419
0
                  CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
9420
0
              Value *matPtr =
9421
0
                  CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
9422
0
              matPtr = SkipAddrSpaceCast(matPtr);
9423
0
              unsigned addrSpace =
9424
0
                  cast<PointerType>(matPtr->getType())->getAddressSpace();
9425
9426
0
              Value *castPtr = Builder.CreateBitCast(
9427
0
                  matPtr, vecVal->getType()->getPointerTo(addrSpace));
9428
0
              Builder.CreateStore(vecVal, castPtr);
9429
0
              CI->eraseFromParent();
9430
0
            } break;
9431
0
            case HLMatLoadStoreOpcode::ColMatLoad:
9432
0
            case HLMatLoadStoreOpcode::RowMatLoad: {
9433
0
              Value *matPtr =
9434
0
                  CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
9435
0
              matPtr = SkipAddrSpaceCast(matPtr);
9436
0
              unsigned addrSpace =
9437
0
                  cast<PointerType>(matPtr->getType())->getAddressSpace();
9438
0
              Value *castPtr = Builder.CreateBitCast(
9439
0
                  matPtr, CI->getType()->getPointerTo(addrSpace));
9440
0
              Value *vecVal = Builder.CreateLoad(castPtr);
9441
0
              CI->replaceAllUsesWith(vecVal);
9442
0
              CI->eraseFromParent();
9443
0
            } break;
9444
0
            }
9445
0
          }
9446
0
        }
9447
0
      }
9448
62.6k
    } else if (group == HLOpcodeGroup::HLCast) {
9449
      // HLCast may be used on matrix value function argument in lib target
9450
1.98k
      if (!F->user_empty()) {
9451
5.36k
        for (auto U = F->user_begin(); U != F->user_end();) {
9452
3.37k
          Value *User = *(U++);
9453
3.37k
          if (!isa<Instruction>(User))
9454
0
            continue;
9455
          // must be call inst
9456
3.37k
          CallInst *CI = cast<CallInst>(User);
9457
3.37k
          IRBuilder<> Builder(CI);
9458
3.37k
          HLCastOpcode opcode =
9459
3.37k
              static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
9460
3.37k
          bool bTranspose = false;
9461
3.37k
          bool bColDest = false;
9462
3.37k
          switch (opcode) {
9463
0
          case HLCastOpcode::RowMatrixToColMatrix:
9464
0
            bColDest = true;
9465
0
            LLVM_FALLTHROUGH;
9466
0
          case HLCastOpcode::ColMatrixToRowMatrix:
9467
0
            bTranspose = true;
9468
0
            LLVM_FALLTHROUGH;
9469
78
          case HLCastOpcode::ColMatrixToVecCast:
9470
164
          case HLCastOpcode::RowMatrixToVecCast: {
9471
164
            Value *matVal =
9472
164
                CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
9473
164
            Value *vecVal =
9474
164
                BitCastValueOrPtr(matVal, CI, CI->getType(),
9475
164
                                  /*bOrigAllocaTy*/ false, matVal->getName());
9476
164
            if (bTranspose) {
9477
0
              HLMatrixType MatTy = HLMatrixType::cast(matVal->getType());
9478
0
              unsigned row = MatTy.getNumRows();
9479
0
              unsigned col = MatTy.getNumColumns();
9480
0
              if (bColDest)
9481
0
                std::swap(row, col);
9482
0
              vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
9483
0
            }
9484
164
            CI->replaceAllUsesWith(vecVal);
9485
164
            CI->eraseFromParent();
9486
164
          } break;
9487
3.37k
          }
9488
3.37k
        }
9489
1.98k
      }
9490
60.6k
    } else if (group == HLOpcodeGroup::HLSubscript) {
9491
12.6k
      TranslateSubscriptOperation(F, helper, pObjHelper);
9492
12.6k
    }
9493
    // map to math function or llvm ir
9494
62.6k
  }
9495
85.8k
}
9496
9497
typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
9498
static void TranslateHLExtension(Function *F,
9499
                                 HLSLExtensionsCodegenHelper *helper,
9500
                                 OP &hlslOp,
9501
68
                                 HLObjectOperationLowerHelper &objHelper) {
9502
  // Find all calls to the function F.
9503
  // Store the calls in a vector for now to be replaced the loop below.
9504
  // We use a two step "find then replace" to avoid removing uses while
9505
  // iterating.
9506
68
  SmallVector<CallInst *, 8> CallsToReplace;
9507
72
  for (User *U : F->users()) {
9508
72
    if (CallInst *CI = dyn_cast<CallInst>(U)) {
9509
72
      CallsToReplace.push_back(CI);
9510
72
    }
9511
72
  }
9512
9513
  // Get the lowering strategy to use for this intrinsic.
9514
68
  llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
9515
68
  HLObjectExtensionLowerHelper extObjHelper(objHelper);
9516
68
  ExtensionLowering lower(LowerStrategy, helper, hlslOp, extObjHelper);
9517
9518
  // Replace all calls that were successfully translated.
9519
72
  for (CallInst *CI : CallsToReplace) {
9520
72
    Value *Result = lower.Translate(CI);
9521
72
    if (Result && Result != CI) {
9522
72
      CI->replaceAllUsesWith(Result);
9523
72
      CI->eraseFromParent();
9524
72
    }
9525
72
  }
9526
68
}
9527
9528
namespace hlsl {
9529
9530
void TranslateBuiltinOperations(
9531
    HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
9532
20.3k
    std::unordered_set<Instruction *> &UpdateCounterSet) {
9533
20.3k
  HLOperationLowerHelper helper(HLM);
9534
9535
20.3k
  HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet};
9536
9537
20.3k
  Module *M = HLM.GetModule();
9538
9539
20.3k
  SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
9540
9541
  // generate dxil operation
9542
195k
  for (iplist<Function>::iterator F : M->getFunctionList()) {
9543
195k
    if (F->user_empty())
9544
34.4k
      continue;
9545
161k
    if (!F->isDeclaration()) {
9546
172
      continue;
9547
172
    }
9548
161k
    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
9549
161k
    if (group == HLOpcodeGroup::NotHL) {
9550
      // Nothing to do.
9551
75.0k
      continue;
9552
75.0k
    }
9553
85.9k
    if (group == HLOpcodeGroup::HLExtIntrinsic) {
9554
68
      TranslateHLExtension(F, extCodegenHelper, helper.hlslOP, objHelper);
9555
68
      continue;
9556
68
    }
9557
85.8k
    if (group == HLOpcodeGroup::HLIntrinsic) {
9558
23.2k
      CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst
9559
23.2k
      unsigned opcode = hlsl::GetHLOpcode(CI);
9560
23.2k
      if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) {
9561
116
        NonUniformResourceIndexIntrinsics.push_back(F);
9562
116
        continue;
9563
116
      }
9564
23.2k
    }
9565
85.7k
    TranslateHLBuiltinOperation(F, helper, group, &objHelper);
9566
85.7k
  }
9567
9568
  // Translate last so value placed in NonUniformSet is still valid.
9569
20.3k
  if (!NonUniformResourceIndexIntrinsics.empty()) {
9570
116
    for (auto F : NonUniformResourceIndexIntrinsics) {
9571
116
      TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic,
9572
116
                                  &objHelper);
9573
116
    }
9574
90
  }
9575
20.3k
}
9576
9577
void EmitGetNodeRecordPtrAndUpdateUsers(HLOperationLowerHelper &helper,
9578
636
                                        CallInst *CI, Value *ArrayIndex) {
9579
636
  IRBuilder<> Builder(CI);
9580
636
  Value *opArg = nullptr;
9581
636
  Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
9582
636
  opArg = Builder.getInt32((unsigned)DXIL::OpCode::GetNodeRecordPtr);
9583
636
  StructType *origRecordUDT =
9584
636
      cast<StructType>(cast<PointerType>(CI->getType())->getElementType());
9585
636
  Type *getNodeRecordPtrRT = origRecordUDT;
9586
  // Translate node record type here
9587
636
  auto findIt = helper.loweredTypes.find(origRecordUDT);
9588
636
  if (findIt != helper.loweredTypes.end()) {
9589
244
    getNodeRecordPtrRT = findIt->second;
9590
392
  } else {
9591
392
    getNodeRecordPtrRT = GetLoweredUDT(origRecordUDT, &helper.dxilTypeSys);
9592
392
    if (origRecordUDT != getNodeRecordPtrRT)
9593
112
      helper.loweredTypes[origRecordUDT] = getNodeRecordPtrRT;
9594
392
  }
9595
636
  getNodeRecordPtrRT =
9596
636
      getNodeRecordPtrRT->getPointerTo(DXIL::kNodeRecordAddrSpace);
9597
636
  Function *getNodeRecordPtr = helper.hlslOP.GetOpFunc(
9598
636
      DXIL::OpCode::GetNodeRecordPtr, getNodeRecordPtrRT);
9599
636
  Value *args[] = {opArg, Handle, ArrayIndex};
9600
636
  Value *NodeRecordPtr = Builder.CreateCall(getNodeRecordPtr, args);
9601
636
  ReplaceUsesForLoweredUDT(CI, NodeRecordPtr);
9602
636
}
9603
9604
20.3k
void LowerRecordAccessToGetNodeRecordPtr(HLModule &HLM) {
9605
20.3k
  Module *M = HLM.GetModule();
9606
20.3k
  HLOperationLowerHelper helper(HLM);
9607
163k
  for (iplist<Function>::iterator F : M->getFunctionList()) {
9608
163k
    if (F->user_empty())
9609
31.9k
      continue;
9610
131k
    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
9611
131k
    if (group == HLOpcodeGroup::HLSubscript) {
9612
43.2k
      for (auto U = F->user_begin(); U != F->user_end();) {
9613
30.2k
        Value *User = *(U++);
9614
30.2k
        if (!isa<Instruction>(User))
9615
0
          continue;
9616
        // must be call inst
9617
30.2k
        CallInst *CI = cast<CallInst>(User);
9618
30.2k
        HLSubscriptOpcode opcode =
9619
30.2k
            static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI));
9620
30.2k
        if (opcode != HLSubscriptOpcode::DefaultSubscript)
9621
9.46k
          continue;
9622
9623
20.8k
        hlsl::OP *OP = &helper.hlslOP;
9624
20.8k
        Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
9625
20.8k
        if (Handle->getType() != OP->GetNodeRecordHandleType()) {
9626
20.1k
          continue;
9627
20.1k
        }
9628
9629
636
        Value *Index = CI->getNumArgOperands() > 2
9630
636
                           ? 
CI->getArgOperand(2)324
9631
636
                           : 
ConstantInt::get(helper.i32Ty, 0)312
;
9632
636
        EmitGetNodeRecordPtrAndUpdateUsers(helper, CI, Index);
9633
636
        CI->eraseFromParent();
9634
636
      }
9635
12.9k
    }
9636
131k
  }
9637
20.3k
}
9638
} // namespace hlsl