Coverage Report

Created: 2026-04-09 02:38

/home/runner/work/DirectXShaderCompiler/DirectXShaderCompiler/lib/HLSL/HLOperationLower.cpp
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////////////
2
//                                                                           //
3
// HLOperationLower.cpp                                                      //
4
// Copyright (C) Microsoft Corporation. All rights reserved.                 //
5
// This file is distributed under the University of Illinois Open Source     //
6
// License. See LICENSE.TXT for details.                                     //
7
//                                                                           //
8
// Lower functions to lower HL operations to DXIL operations.                //
9
//                                                                           //
10
///////////////////////////////////////////////////////////////////////////////
11
12
#include "dxc/DXIL/DxilConstants.h"
13
#define _USE_MATH_DEFINES
14
#include <array>
15
#include <cmath>
16
#include <functional>
17
#include <unordered_set>
18
19
#include "dxc/DXIL/DxilConstants.h"
20
#include "dxc/DXIL/DxilInstructions.h"
21
#include "dxc/DXIL/DxilModule.h"
22
#include "dxc/DXIL/DxilOperations.h"
23
#include "dxc/DXIL/DxilResourceProperties.h"
24
#include "dxc/DXIL/DxilUtil.h"
25
#include "dxc/HLSL/DxilPoisonValues.h"
26
#include "dxc/HLSL/HLLowerUDT.h"
27
#include "dxc/HLSL/HLMatrixLowerHelper.h"
28
#include "dxc/HLSL/HLMatrixType.h"
29
#include "dxc/HLSL/HLModule.h"
30
#include "dxc/HLSL/HLOperationLower.h"
31
#include "dxc/HLSL/HLOperationLowerExtension.h"
32
#include "dxc/HLSL/HLOperations.h"
33
#include "dxc/HlslIntrinsicOp.h"
34
35
#include "llvm/ADT/APSInt.h"
36
#include "llvm/IR/GetElementPtrTypeIterator.h"
37
#include "llvm/IR/IRBuilder.h"
38
#include "llvm/IR/Instructions.h"
39
#include "llvm/IR/IntrinsicInst.h"
40
#include "llvm/IR/Module.h"
41
42
using namespace llvm;
43
using namespace hlsl;
44
45
struct HLOperationLowerHelper {
46
  HLModule &M;
47
  OP &hlslOP;
48
  Type *voidTy;
49
  Type *f32Ty;
50
  Type *i32Ty;
51
  Type *i16Ty;
52
  llvm::Type *i1Ty;
53
  Type *i8Ty;
54
  DxilTypeSystem &dxilTypeSys;
55
  DxilFunctionProps *functionProps;
56
  DataLayout dataLayout;
57
  SmallDenseMap<Type *, Type *, 4> loweredTypes;
58
  HLOperationLowerHelper(HLModule &HLM);
59
};
60
61
HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM)
62
40.9k
    : M(HLM), hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()),
63
40.9k
      dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision
64
40.9k
                                ? 
hlsl::DXIL::kLegacyLayoutString38.6k
65
40.9k
                                : 
hlsl::DXIL::kNewLayoutString2.24k
)) {
66
40.9k
  llvm::LLVMContext &Ctx = HLM.GetCtx();
67
40.9k
  voidTy = Type::getVoidTy(Ctx);
68
40.9k
  f32Ty = Type::getFloatTy(Ctx);
69
40.9k
  i32Ty = Type::getInt32Ty(Ctx);
70
40.9k
  i16Ty = Type::getInt16Ty(Ctx);
71
40.9k
  i1Ty = Type::getInt1Ty(Ctx);
72
40.9k
  i8Ty = Type::getInt8Ty(Ctx);
73
40.9k
  Function *EntryFunc = HLM.GetEntryFunction();
74
40.9k
  functionProps = nullptr;
75
40.9k
  if (HLM.HasDxilFunctionProps(EntryFunc))
76
35.1k
    functionProps = &HLM.GetDxilFunctionProps(EntryFunc);
77
40.9k
}
78
79
struct HLObjectOperationLowerHelper {
80
private:
81
  // For object intrinsics.
82
  HLModule &HLM;
83
  struct ResAttribute {
84
    DXIL::ResourceClass RC;
85
    DXIL::ResourceKind RK;
86
    Type *ResourceType;
87
  };
88
  std::unordered_map<Value *, ResAttribute> HandleMetaMap;
89
  std::unordered_set<Instruction *> &UpdateCounterSet;
90
  // Map from pointer of cbuffer to pointer of resource.
91
  // For cbuffer like this:
92
  //   cbuffer A {
93
  //     Texture2D T;
94
  //   };
95
  // A global resource Texture2D T2 will be created for Texture2D T.
96
  // CBPtrToResourceMap[T] will return T2.
97
  std::unordered_map<Value *, Value *> CBPtrToResourceMap;
98
99
public:
100
  HLObjectOperationLowerHelper(HLModule &HLM,
101
                               std::unordered_set<Instruction *> &UpdateCounter)
102
20.4k
      : HLM(HLM), UpdateCounterSet(UpdateCounter) {}
103
18.6k
  DXIL::ResourceClass GetRC(Value *Handle) {
104
18.6k
    ResAttribute &Res = FindCreateHandleResourceBase(Handle);
105
18.6k
    return Res.RC;
106
18.6k
  }
107
46.0k
  DXIL::ResourceKind GetRK(Value *Handle) {
108
46.0k
    ResAttribute &Res = FindCreateHandleResourceBase(Handle);
109
46.0k
    return Res.RK;
110
46.0k
  }
111
20.4k
  Type *GetResourceType(Value *Handle) {
112
20.4k
    ResAttribute &Res = FindCreateHandleResourceBase(Handle);
113
20.4k
    return Res.ResourceType;
114
20.4k
  }
115
116
2.94k
  void MarkHasCounter(Value *handle, Type *i8Ty) {
117
2.94k
    CallInst *CIHandle = cast<CallInst>(handle);
118
2.94k
    DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) ==
119
2.94k
                 HLOpcodeGroup::HLAnnotateHandle,
120
2.94k
             "else invalid handle");
121
    // Mark has counter for the input handle.
122
2.94k
    Value *counterHandle =
123
2.94k
        CIHandle->getArgOperand(HLOperandIndex::kHandleOpIdx);
124
    // Change kind into StructurBufferWithCounter.
125
2.94k
    Constant *Props = cast<Constant>(CIHandle->getArgOperand(
126
2.94k
        HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
127
2.94k
    DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props);
128
2.94k
    RP.Basic.SamplerCmpOrHasCounter = true;
129
130
2.94k
    CIHandle->setArgOperand(
131
2.94k
        HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx,
132
2.94k
        resource_helper::getAsConstant(RP,
133
2.94k
                                       HLM.GetOP()->GetResourcePropertiesType(),
134
2.94k
                                       *HLM.GetShaderModel()));
135
136
2.94k
    DXIL::ResourceClass RC = GetRC(handle);
137
2.94k
    DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV,
138
2.94k
                      "must UAV for counter");
139
2.94k
    std::unordered_set<Value *> resSet;
140
2.94k
    MarkHasCounterOnCreateHandle(counterHandle, resSet);
141
2.94k
  }
142
143
28
  DxilResourceBase *FindCBufferResourceFromHandle(Value *handle) {
144
28
    if (CallInst *CI = dyn_cast<CallInst>(handle)) {
145
28
      hlsl::HLOpcodeGroup group =
146
28
          hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
147
28
      if (group == HLOpcodeGroup::HLAnnotateHandle) {
148
28
        handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
149
28
      }
150
28
    }
151
152
28
    Constant *symbol = nullptr;
153
28
    if (CallInst *CI = dyn_cast<CallInst>(handle)) {
154
28
      hlsl::HLOpcodeGroup group =
155
28
          hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
156
28
      if (group == HLOpcodeGroup::HLCreateHandle) {
157
28
        symbol = dyn_cast<Constant>(
158
28
            CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
159
28
      }
160
28
    }
161
162
28
    if (!symbol)
163
0
      return nullptr;
164
165
28
    for (const std::unique_ptr<DxilCBuffer> &res : HLM.GetCBuffers()) {
166
28
      if (res->GetGlobalSymbol() == symbol)
167
28
        return res.get();
168
28
    }
169
0
    return nullptr;
170
28
  }
171
172
  Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr,
173
                                     GlobalVariable *CbGV,
174
314
                                     DxilResourceProperties &RP) {
175
    // Change array idx to 0 to make sure all array ptr share same key.
176
314
    Value *Key = UniformCbPtr(CbPtr, CbGV);
177
314
    if (CBPtrToResourceMap.count(Key))
178
24
      return CBPtrToResourceMap[Key];
179
290
    Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP);
180
290
    CBPtrToResourceMap[Key] = Resource;
181
290
    return Resource;
182
314
  }
183
184
314
  Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) {
185
    // Simple case.
186
314
    if (ResPtr->getType() == CbPtr->getType())
187
314
      return ResPtr;
188
189
    // Array case.
190
0
    DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy());
191
192
0
    IRBuilder<> Builder(CbPtr);
193
0
    gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
194
195
0
    Value *arrayIdx = GEPIt.getOperand();
196
197
    // Only calc array idx and size.
198
    // Ignore struct type part.
199
0
    for (; GEPIt != E; ++GEPIt) {
200
0
      if (GEPIt->isArrayTy()) {
201
0
        arrayIdx = Builder.CreateMul(
202
0
            arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements()));
203
0
        arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand());
204
0
      }
205
0
    }
206
207
0
    return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx});
208
314
  }
209
210
314
  DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) {
211
314
    Constant *Props = cast<Constant>(Anno->getArgOperand(
212
314
        HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
213
314
    DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props);
214
314
    return RP;
215
314
  }
216
217
private:
218
85.1k
  ResAttribute &FindCreateHandleResourceBase(Value *Handle) {
219
85.1k
    if (HandleMetaMap.count(Handle))
220
48.2k
      return HandleMetaMap[Handle];
221
222
    // Add invalid first to avoid dead loop.
223
36.9k
    HandleMetaMap[Handle] = {
224
36.9k
        DXIL::ResourceClass::Invalid, DXIL::ResourceKind::Invalid,
225
36.9k
        StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)};
226
36.9k
    if (CallInst *CI = dyn_cast<CallInst>(Handle)) {
227
36.9k
      hlsl::HLOpcodeGroup group =
228
36.9k
          hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
229
36.9k
      if (group == HLOpcodeGroup::HLAnnotateHandle) {
230
36.9k
        Constant *Props = cast<Constant>(CI->getArgOperand(
231
36.9k
            HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx));
232
36.9k
        DxilResourceProperties RP =
233
36.9k
            resource_helper::loadPropsFromConstant(*Props);
234
36.9k
        Type *ResTy =
235
36.9k
            CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx)
236
36.9k
                ->getType();
237
238
36.9k
        ResAttribute Attrib = {RP.getResourceClass(), RP.getResourceKind(),
239
36.9k
                               ResTy};
240
241
36.9k
        HandleMetaMap[Handle] = Attrib;
242
36.9k
        return HandleMetaMap[Handle];
243
36.9k
      }
244
36.9k
    }
245
6
    dxilutil::EmitErrorOnContext(Handle->getContext(),
246
6
                                 "cannot map resource to handle.");
247
248
6
    return HandleMetaMap[Handle];
249
36.9k
  }
250
  CallInst *FindCreateHandle(Value *handle,
251
0
                             std::unordered_set<Value *> &resSet) {
252
0
    // Already checked.
253
0
    if (resSet.count(handle))
254
0
      return nullptr;
255
0
    resSet.insert(handle);
256
0
257
0
    if (CallInst *CI = dyn_cast<CallInst>(handle))
258
0
      return CI;
259
0
    if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
260
0
      if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet))
261
0
        return CI;
262
0
      if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet))
263
0
        return CI;
264
0
      return nullptr;
265
0
    }
266
0
    if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
267
0
      for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
268
0
        if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet))
269
0
          return CI;
270
0
      }
271
0
      return nullptr;
272
0
    }
273
0
274
0
    return nullptr;
275
0
  }
276
  void MarkHasCounterOnCreateHandle(Value *handle,
277
2.94k
                                    std::unordered_set<Value *> &resSet) {
278
    // Already checked.
279
2.94k
    if (resSet.count(handle))
280
0
      return;
281
2.94k
    resSet.insert(handle);
282
283
2.94k
    if (CallInst *CI = dyn_cast<CallInst>(handle)) {
284
2.94k
      Value *Res =
285
2.94k
          CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx);
286
2.94k
      LoadInst *LdRes = dyn_cast<LoadInst>(Res);
287
2.94k
      if (LdRes) {
288
2.93k
        UpdateCounterSet.insert(LdRes);
289
2.93k
        return;
290
2.93k
      }
291
8
      if (CallInst *CallRes = dyn_cast<CallInst>(Res)) {
292
8
        hlsl::HLOpcodeGroup group =
293
8
            hlsl::GetHLOpcodeGroup(CallRes->getCalledFunction());
294
8
        if (group == HLOpcodeGroup::HLCast) {
295
8
          HLCastOpcode opcode =
296
8
              static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CallRes));
297
8
          if (opcode == HLCastOpcode::HandleToResCast) {
298
8
            if (Instruction *Hdl = dyn_cast<Instruction>(
299
8
                    CallRes->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx)))
300
8
              UpdateCounterSet.insert(Hdl);
301
8
            return;
302
8
          }
303
8
        }
304
8
      }
305
0
      dxilutil::EmitErrorOnInstruction(CI, "cannot map resource to handle.");
306
0
      return;
307
8
    }
308
0
    if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) {
309
0
      MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet);
310
0
      MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet);
311
0
    }
312
0
    if (PHINode *Phi = dyn_cast<PHINode>(handle)) {
313
0
      for (unsigned i = 0; i < Phi->getNumOperands(); i++) {
314
0
        MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet);
315
0
      }
316
0
    }
317
0
  }
318
319
314
  Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) {
320
314
    gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
321
314
    std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end());
322
314
    unsigned i = 0;
323
314
    IRBuilder<> Builder(HLM.GetCtx());
324
314
    Value *zero = Builder.getInt32(0);
325
1.29k
    for (; GEPIt != E; 
++GEPIt, ++i982
) {
326
982
      ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand());
327
982
      if (!ImmIdx) {
328
        // Remove dynamic indexing to avoid crash.
329
8
        idxList[i] = zero;
330
8
      }
331
982
    }
332
333
314
    Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList);
334
314
    return Key;
335
314
  }
336
337
  Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV,
338
290
                                DxilResourceProperties &RP) {
339
290
    Type *CbTy = CbPtr->getPointerOperandType();
340
290
    DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(),
341
290
                      "else arg not point to var");
342
343
290
    gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr);
344
290
    unsigned i = 0;
345
290
    IRBuilder<> Builder(HLM.GetCtx());
346
290
    unsigned arraySize = 1;
347
290
    DxilTypeSystem &typeSys = HLM.GetTypeSystem();
348
349
290
    std::string Name;
350
1.19k
    for (; GEPIt != E; 
++GEPIt, ++i902
) {
351
902
      if (GEPIt->isArrayTy()) {
352
72
        arraySize *= GEPIt->getArrayNumElements();
353
72
        if (!Name.empty())
354
72
          Name += ".";
355
72
        if (ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand())) {
356
64
          unsigned idx = ImmIdx->getLimitedValue();
357
64
          Name += std::to_string(idx);
358
64
        }
359
830
      } else if (GEPIt->isStructTy()) {
360
540
        DxilStructAnnotation *typeAnnot =
361
540
            typeSys.GetStructAnnotation(cast<StructType>(*GEPIt));
362
540
        DXASSERT_NOMSG(typeAnnot);
363
540
        unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue();
364
540
        DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx);
365
540
        DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx);
366
540
        if (!Name.empty())
367
250
          Name += ".";
368
540
        Name += fieldAnnot.GetFieldName();
369
540
      }
370
902
    }
371
372
290
    Type *Ty = CbPtr->getResultElementType();
373
    // Not support resource array in cbuffer.
374
290
    unsigned ResBinding =
375
290
        HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.getResourceClass());
376
290
    return CreateResourceGV(Ty, Name, RP, ResBinding);
377
290
  }
378
379
  Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP,
380
290
                          unsigned ResBinding) {
381
290
    Module &M = *HLM.GetModule();
382
290
    Constant *GV = M.getOrInsertGlobal(Name, Ty);
383
    // Create resource and set GV as globalSym.
384
290
    DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP);
385
290
    DXASSERT(Res, "fail to create resource for global variable in cbuffer");
386
290
    Res->SetLowerBound(ResBinding);
387
290
    return GV;
388
290
  }
389
};
390
391
// Helper for lowering resource extension methods.
392
struct HLObjectExtensionLowerHelper : public hlsl::HLResourceLookup {
393
  explicit HLObjectExtensionLowerHelper(HLObjectOperationLowerHelper &ObjHelper)
394
68
      : m_ObjHelper(ObjHelper) {}
395
396
6
  virtual bool GetResourceKindName(Value *HLHandle, const char **ppName) {
397
6
    DXIL::ResourceKind K = m_ObjHelper.GetRK(HLHandle);
398
6
    bool Success = K != DXIL::ResourceKind::Invalid;
399
6
    if (Success) {
400
6
      *ppName = hlsl::GetResourceKindName(K);
401
6
    }
402
6
    return Success;
403
6
  }
404
405
private:
406
  HLObjectOperationLowerHelper &m_ObjHelper;
407
};
408
409
using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP,
410
                                     DXIL::OpCode opcode,
411
                                     HLOperationLowerHelper &helper,
412
                                     HLObjectOperationLowerHelper *pObjHelper,
413
                                     bool &Translated);
414
415
struct IntrinsicLower {
416
  // Intrinsic opcode.
417
  IntrinsicOp IntriOpcode;
418
  // Lower function.
419
  IntrinsicLowerFuncTy &LowerFunc;
420
  // DXIL opcode if can direct map.
421
  DXIL::OpCode DxilOpcode;
422
};
423
424
// IOP intrinsics.
425
namespace {
426
427
// Creates the necessary scalar calls to for a "trivial" operation where only
428
// call instructions to a single function type are needed.
429
// The overload type `Ty` determines what scalarization might be required.
430
// Elements of any vectors in `refArgs` are extracted  into scalars for each
431
// call generated while the same scalar values are used unaltered in each call.
432
// Utility objects `HlslOp` and `Builder` are used to generate calls to the
433
// given `DxilFunc` for each set of scalar arguments.
434
// The results are reconstructed into the given `RetTy` as needed.
435
Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode,
436
                            ArrayRef<Value *> refArgs, Type *Ty, Type *RetTy,
437
32.3k
                            OP *hlslOP, IRBuilder<> &Builder) {
438
32.3k
  unsigned argNum = refArgs.size();
439
32.3k
  std::vector<Value *> args = refArgs;
440
441
32.3k
  if (Ty->isVectorTy()) {
442
8.37k
    Value *retVal = llvm::UndefValue::get(RetTy);
443
8.37k
    unsigned vecSize = Ty->getVectorNumElements();
444
35.0k
    for (unsigned i = 0; i < vecSize; 
i++26.6k
) {
445
      // Update vector args, skip known opcode arg.
446
67.1k
      for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum;
447
40.5k
           argIdx++) {
448
40.5k
        if (refArgs[argIdx]->getType()->isVectorTy()) {
449
36.9k
          Value *arg = refArgs[argIdx];
450
36.9k
          args[argIdx] = Builder.CreateExtractElement(arg, i);
451
36.9k
        }
452
40.5k
      }
453
26.6k
      Value *EltOP =
454
26.6k
          Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
455
26.6k
      retVal = Builder.CreateInsertElement(retVal, EltOP, i);
456
26.6k
    }
457
8.37k
    return retVal;
458
8.37k
  }
459
460
  // Cannot add name to void.
461
23.9k
  if (RetTy->isVoidTy())
462
306
    return Builder.CreateCall(dxilFunc, args);
463
464
23.6k
  return Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode));
465
23.9k
}
466
467
// Creates a native vector call to for a "trivial" operation where only a single
468
// call instruction is needed. The overload and return types are the same vector
469
// type `Ty`.
470
// Utility objects `HlslOp` and `Builder` are used to create a call to the given
471
// `DxilFunc` with `RefArgs` arguments.
472
Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode,
473
                                  ArrayRef<Value *> Args, Type *Ty, OP *OP,
474
1.09k
                                  IRBuilder<> &Builder) {
475
1.09k
  if (!Ty->isVoidTy())
476
1.09k
    return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode));
477
0
  return Builder.CreateCall(Func, Args); // Cannot add name to void.
478
1.09k
}
479
480
// Generates a DXIL operation with the overloaded type based on `Ty` and return
481
// type `RetTy`. When Ty is a vector, it will either generate per-element calls
482
// for each vector element and reconstruct the vector type from those results or
483
// operate on and return native vectors depending on vector size and the
484
// legality of the vector overload.
485
Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
486
                            Type *Ty, Type *RetTy, OP *hlslOP,
487
32.7k
                            IRBuilder<> &Builder) {
488
489
  // If supported and the overload type is a vector with more than 1 element,
490
  // create a native vector operation.
491
32.7k
  if (Ty->isVectorTy() && 
Ty->getVectorNumElements() > 19.33k
&&
492
32.7k
      
hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()8.51k
&&
493
32.7k
      
OP::IsOverloadLegal(opcode, Ty)1.08k
) {
494
1.04k
    Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
495
1.04k
    return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP,
496
1.04k
                                      Builder);
497
1.04k
  }
498
499
  // Set overload type to the scalar type of `Ty` and generate call(s).
500
31.6k
  Type *EltTy = Ty->getScalarType();
501
31.6k
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy);
502
503
31.6k
  return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP,
504
31.6k
                              Builder);
505
32.7k
}
506
507
Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs,
508
3.72k
                            Type *Ty, Instruction *Inst, OP *hlslOP) {
509
3.72k
  DXASSERT(refArgs.size() > 0, "else opcode isn't in signature");
510
3.72k
  DXASSERT(refArgs[0] == nullptr,
511
3.72k
           "else caller has already filled the value in");
512
3.72k
  IRBuilder<> B(Inst);
513
3.72k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
514
3.72k
  const_cast<llvm::Value **>(refArgs.data())[0] =
515
3.72k
      opArg; // actually stack memory from caller
516
3.72k
  return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B);
517
3.72k
}
518
519
// Translate call that converts to a dxil unary operation with a different
520
// return type from the overload by passing the argument, explicit return type,
521
// and helper objects to the scalarizing unary dxil operation creation.
522
Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP,
523
                                OP::OpCode OpCode,
524
                                HLOperationLowerHelper &Helper,
525
                                HLObjectOperationLowerHelper *,
526
98
                                bool &Translated) {
527
98
  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
528
98
  Type *Ty = Src->getType();
529
530
98
  IRBuilder<> Builder(CI);
531
98
  hlsl::OP *OP = &Helper.hlslOP;
532
98
  Type *RetTy = CI->getType();
533
98
  Constant *OpArg = OP->GetU32Const((unsigned)OpCode);
534
98
  Value *Args[] = {OpArg, Src};
535
536
98
  return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder);
537
98
}
538
539
Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op,
540
7.60k
                                 IRBuilder<> &Builder) {
541
7.60k
  Type *Ty = Src->getType();
542
543
7.60k
  Constant *OpArg = Op->GetU32Const((unsigned)OpCode);
544
7.60k
  Value *Args[] = {OpArg, Src};
545
546
7.60k
  return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder);
547
7.60k
}
548
549
Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
550
7.06k
                                  hlsl::OP *hlslOP, IRBuilder<> &Builder) {
551
7.06k
  Type *Ty = src0->getType();
552
553
7.06k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
554
7.06k
  Value *args[] = {opArg, src0, src1};
555
556
7.06k
  return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
557
7.06k
}
558
559
Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1,
560
                                   Value *src2, hlsl::OP *hlslOP,
561
12.9k
                                   IRBuilder<> &Builder) {
562
12.9k
  Type *Ty = src0->getType();
563
564
12.9k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
565
12.9k
  Value *args[] = {opArg, src0, src1, src2};
566
567
12.9k
  return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
568
12.9k
}
569
570
// Translate call that trivially converts to a dxil unary operation by passing
571
// argument, return type, and helper objects to either scalarizing or native
572
// vector dxil operation creation depending on version and vector size.
573
Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
574
                             HLOperationLowerHelper &helper,
575
                             HLObjectOperationLowerHelper *pObjHelper,
576
4.34k
                             bool &Translated) {
577
4.34k
  Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
578
4.34k
  IRBuilder<> Builder(CI);
579
4.34k
  hlsl::OP *hlslOP = &helper.hlslOP;
580
581
4.34k
  return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder);
582
4.34k
}
583
584
// Translate call that trivially converts to a dxil binary operation by passing
585
// arguments, return type, and helper objects to either scalarizing or native
586
// vector dxil operation creation depending on version and vector size.
587
Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
588
                              HLOperationLowerHelper &helper,
589
                              HLObjectOperationLowerHelper *pObjHelper,
590
2.49k
                              bool &Translated) {
591
2.49k
  hlsl::OP *hlslOP = &helper.hlslOP;
592
2.49k
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
593
2.49k
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
594
2.49k
  IRBuilder<> Builder(CI);
595
596
2.49k
  Value *binOp =
597
2.49k
      TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder);
598
2.49k
  return binOp;
599
2.49k
}
600
601
// Translate call that trivially converts to a dxil trinary (aka tertiary)
602
// operation by passing arguments, return type, and helper objects to either
603
// scalarizing or native vector dxil operation creation depending on version
604
// and vector size.
605
Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
606
                               HLOperationLowerHelper &helper,
607
                               HLObjectOperationLowerHelper *pObjHelper,
608
12.0k
                               bool &Translated) {
609
12.0k
  hlsl::OP *hlslOP = &helper.hlslOP;
610
12.0k
  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
611
12.0k
  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
612
12.0k
  Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
613
12.0k
  IRBuilder<> Builder(CI);
614
615
12.0k
  Value *triOp =
616
12.0k
      TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder);
617
12.0k
  return triOp;
618
12.0k
}
619
620
Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
621
                             HLOperationLowerHelper &helper,
622
                             HLObjectOperationLowerHelper *pObjHelper,
623
174
                             bool &Translated) {
624
174
  hlsl::OP *hlslOP = &helper.hlslOP;
625
174
  Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
626
174
  IRBuilder<> Builder(CI);
627
628
174
  Type *Ty = src->getType();
629
174
  Type *RetTy = Type::getInt1Ty(CI->getContext());
630
174
  if (Ty->isVectorTy())
631
142
    RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
632
633
174
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
634
174
  Value *args[] = {opArg, src};
635
636
174
  return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder);
637
174
}
638
639
120
bool IsResourceGEP(GetElementPtrInst *I) {
640
120
  Type *Ty = I->getType()->getPointerElementType();
641
120
  Ty = dxilutil::GetArrayEltTy(Ty);
642
  // Only mark on GEP which point to resource.
643
120
  return dxilutil::IsHLSLResourceType(Ty);
644
120
}
645
646
Value *TranslateNonUniformResourceIndex(
647
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
648
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
649
196
    bool &Translated) {
650
196
  Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
651
196
  Type *hdlTy = helper.hlslOP.GetHandleType();
652
212
  for (User *U : CI->users()) {
653
212
    if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) {
654
      // Only mark on GEP which point to resource.
655
108
      if (IsResourceGEP(I))
656
100
        DxilMDHelper::MarkNonUniform(I);
657
108
    } else 
if (CastInst *104
castI104
= dyn_cast<CastInst>(U)) {
658
40
      for (User *castU : castI->users()) {
659
40
        if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) {
660
          // Only mark on GEP which point to resource.
661
12
          if (IsResourceGEP(I))
662
12
            DxilMDHelper::MarkNonUniform(I);
663
28
        } else if (CallInst *CI = dyn_cast<CallInst>(castU)) {
664
28
          if (CI->getType() == hdlTy)
665
28
            DxilMDHelper::MarkNonUniform(CI);
666
28
        }
667
40
      }
668
64
    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
669
64
      if (CI->getType() == hdlTy)
670
44
        DxilMDHelper::MarkNonUniform(CI);
671
64
    }
672
212
  }
673
196
  CI->replaceAllUsesWith(V);
674
196
  return nullptr;
675
196
}
676
677
Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
678
                      HLOperationLowerHelper &helper,
679
                      HLObjectOperationLowerHelper *pObjHelper,
680
1.55k
                      bool &Translated) {
681
1.55k
  hlsl::OP *OP = &helper.hlslOP;
682
1.55k
  Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType());
683
1.55k
  Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier);
684
685
1.55k
  unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal);
686
1.55k
  unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence);
687
1.55k
  unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup);
688
  // unsigned ut =
689
  // static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup);
690
691
1.55k
  unsigned barrierMode = 0;
692
1.55k
  switch (IOP) {
693
8
  case IntrinsicOp::IOP_AllMemoryBarrier:
694
8
    barrierMode = uglobal | g;
695
8
    break;
696
16
  case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync:
697
16
    barrierMode = uglobal | g | t;
698
16
    break;
699
32
  case IntrinsicOp::IOP_GroupMemoryBarrier:
700
32
    barrierMode = g;
701
32
    break;
702
1.46k
  case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync:
703
1.46k
    barrierMode = g | t;
704
1.46k
    break;
705
24
  case IntrinsicOp::IOP_DeviceMemoryBarrier:
706
24
    barrierMode = uglobal;
707
24
    break;
708
8
  case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync:
709
8
    barrierMode = uglobal | t;
710
8
    break;
711
0
  default:
712
0
    DXASSERT(0, "invalid opcode for barrier");
713
0
    break;
714
1.55k
  }
715
1.55k
  Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode));
716
717
1.55k
  Value *args[] = {opArg, src0};
718
719
1.55k
  IRBuilder<> Builder(CI);
720
1.55k
  Builder.CreateCall(dxilFunc, args);
721
1.55k
  return nullptr;
722
1.55k
}
723
724
Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP,
725
                                 OP::OpCode opcode,
726
                                 HLOperationLowerHelper &helper,
727
                                 HLObjectOperationLowerHelper *pObjHelper,
728
32
                                 bool &Translated) {
729
32
  IRBuilder<> Builder(CI);
730
32
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
731
32
  Type *Ty = val->getType();
732
733
  // Use the same scaling factor used by FXC (i.e., 255.001953)
734
  // Excerpt from stackoverflow discussion:
735
  // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
736
32
  Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255.001953);
737
738
32
  if (Ty->isVectorTy()) {
739
32
    static constexpr int supportedVecElemCount = 4;
740
32
    if (Ty->getVectorNumElements() != supportedVecElemCount) {
741
0
      llvm_unreachable(
742
0
          "Unsupported input type for intrinsic D3DColorToUByte4.");
743
0
      return UndefValue::get(CI->getType());
744
0
    }
745
746
32
    toByteConst = ConstantVector::getSplat(supportedVecElemCount, toByteConst);
747
    // Swizzle the input val -> val.zyxw
748
32
    SmallVector<int, 4> mask{2, 1, 0, 3};
749
32
    val = Builder.CreateShuffleVector(val, val, mask);
750
32
  }
751
752
32
  Value *byte4 = Builder.CreateFMul(toByteConst, val);
753
32
  return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType());
754
32
}
755
756
// Returns true if pow can be implemented using Fxc's mul-only code gen pattern.
757
// Fxc uses the below rules when choosing mul-only code gen pattern to implement
758
// pow function. Rule 1: Applicable only to power values in the range
759
// [INT32_MIN, INT32_MAX] Rule 2: The maximum number of mul ops needed shouldn't
760
// exceed (2n+1) or (n+1) based on whether the power
761
//         is a positive or a negative value. Here "n" is the number of scalar
762
//         elements in power.
763
// Rule 3: Power must be an exact value.
764
// +----------+---------------------+------------------+
765
// | BaseType | IsExponentPositive  | MaxMulOpsAllowed |
766
// +----------+---------------------+------------------+
767
// | float4x4 | True                |               33 |
768
// | float4x4 | False               |               17 |
769
// | float4x2 | True                |               17 |
770
// | float4x2 | False               |                9 |
771
// | float2x4 | True                |               17 |
772
// | float2x4 | False               |                9 |
773
// | float4   | True                |                9 |
774
// | float4   | False               |                5 |
775
// | float2   | True                |                5 |
776
// | float2   | False               |                3 |
777
// | float    | True                |                3 |
778
// | float    | False               |                2 |
779
// +----------+---------------------+------------------+
780
781
bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow,
782
1.45k
                                   int32_t &powI) {
783
  // Applicable only when power is a literal.
784
1.45k
  if (!isa<ConstantDataVector>(pow) && 
!isa<ConstantFP>(pow)262
) {
785
74
    return false;
786
74
  }
787
788
  // Only apply this code gen on splat values.
789
1.38k
  if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) {
790
1.19k
    if (!hlsl::dxilutil::IsSplat(cdv)) {
791
8
      return false;
792
8
    }
793
1.19k
  }
794
795
  // Only apply on aggregates of 16 or fewer elements,
796
  // representing the max 4x4 matrix size.
797
1.37k
  Type *Ty = x->getType();
798
1.37k
  if (Ty->isVectorTy() && 
Ty->getVectorNumElements() > 161.18k
)
799
0
    return false;
800
801
1.37k
  APFloat powAPF = isa<ConstantDataVector>(pow)
802
1.37k
                       ? 
cast<ConstantDataVector>(pow)->getElementAsAPFloat(0)1.18k
803
1.37k
                       : // should be a splat value
804
1.37k
                       
cast<ConstantFP>(pow)->getValueAPF()188
;
805
1.37k
  APSInt powAPS(32, false);
806
1.37k
  bool isExact = false;
807
  // Try converting float value of power to integer and also check if the float
808
  // value is exact.
809
1.37k
  APFloat::opStatus status =
810
1.37k
      powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact);
811
1.37k
  if (status == APFloat::opStatus::opOK && 
isExact348
) {
812
340
    powI = powAPS.getExtValue();
813
340
    uint32_t powU = abs(powI);
814
340
    int setBitCount = 0;
815
340
    int maxBitSetPos = -1;
816
11.2k
    for (int i = 0; i < 32; 
i++10.8k
) {
817
10.8k
      if ((powU >> i) & 1) {
818
548
        setBitCount++;
819
548
        maxBitSetPos = i;
820
548
      }
821
10.8k
    }
822
823
340
    DXASSERT(maxBitSetPos <= 30, "msb should always be zero.");
824
340
    unsigned numElem =
825
340
        isa<ConstantDataVector>(pow) ? 
x->getType()->getVectorNumElements()152
:
1188
;
826
340
    int mulOpThreshold = powI < 0 ? 
numElem + 132
:
2 * numElem + 1308
;
827
340
    int mulOpNeeded = maxBitSetPos + setBitCount - 1;
828
340
    return mulOpNeeded <= mulOpThreshold;
829
340
  }
830
831
1.03k
  return false;
832
1.37k
}
833
834
Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<> &Builder, Value *x,
835
184
                                          const int32_t y) {
836
184
  uint32_t absY = abs(y);
837
  // If y is zero then always return 1.
838
184
  if (absY == 0) {
839
8
    return ConstantFP::get(x->getType(), 1);
840
8
  }
841
842
176
  int lastSetPos = -1;
843
176
  Value *result = nullptr;
844
176
  Value *mul = nullptr;
845
5.80k
  for (int i = 0; i < 32; 
i++5.63k
) {
846
5.63k
    if ((absY >> i) & 1) {
847
1.31k
      for (int j = i; j > lastSetPos; 
j--1.00k
) {
848
1.00k
        if (!mul) {
849
176
          mul = x;
850
832
        } else {
851
832
          mul = Builder.CreateFMul(mul, mul);
852
832
        }
853
1.00k
      }
854
855
304
      result = (result == nullptr) ? 
mul176
:
Builder.CreateFMul(result, mul)128
;
856
304
      lastSetPos = i;
857
304
    }
858
5.63k
  }
859
860
  // Compute reciprocal for negative power values.
861
176
  if (y < 0) {
862
32
    Value *constOne = ConstantFP::get(x->getType(), 1);
863
32
    result = Builder.CreateFDiv(constOne, result);
864
32
  }
865
866
176
  return result;
867
184
}
868
869
Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<> &Builder, Value *x,
870
1.45k
                        Value *y, bool isFXCCompatMode = false) {
871
  // As applicable implement pow using only mul ops as done by Fxc.
872
1.45k
  int32_t p = 0;
873
1.45k
  if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) {
874
304
    if (isFXCCompatMode)
875
184
      return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p);
876
    // Only take care 2 for it will not affect register pressure.
877
120
    if (p == 2)
878
56
      return Builder.CreateFMul(x, x);
879
120
  }
880
881
  // Default to log-mul-exp pattern if previous scenarios don't apply.
882
  // t = log(x);
883
1.21k
  Value *logX =
884
1.21k
      TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder);
885
  // t = y * t;
886
1.21k
  Value *mulY = Builder.CreateFMul(logX, y);
887
  // pow = exp(t);
888
1.21k
  return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder);
889
1.45k
}
890
891
Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
892
                          HLOperationLowerHelper &helper,
893
                          HLObjectOperationLowerHelper *pObjHelper,
894
32
                          bool &Translated) {
895
32
  hlsl::OP *hlslOP = &helper.hlslOP;
896
32
  IRBuilder<> Builder(CI);
897
32
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
898
32
  Type *Ty = val->getType();
899
32
  VectorType *VT = dyn_cast<VectorType>(Ty);
900
32
  if (!VT) {
901
0
    dxilutil::EmitErrorOnInstruction(
902
0
        CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
903
0
    return UndefValue::get(Ty);
904
0
  }
905
906
32
  unsigned size = VT->getNumElements();
907
32
  if (size != 2 && 
size != 424
) {
908
16
    dxilutil::EmitErrorOnInstruction(
909
16
        CI, "AddUint64 can only be applied to uint2 and uint4 operands.");
910
16
    return UndefValue::get(Ty);
911
16
  }
912
16
  Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
913
16
  Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
914
915
16
  Value *RetVal = UndefValue::get(Ty);
916
917
16
  Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty);
918
16
  Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc));
919
40
  for (unsigned i = 0; i < size; 
i += 224
) {
920
24
    Value *low0 = Builder.CreateExtractElement(op0, i);
921
24
    Value *low1 = Builder.CreateExtractElement(op1, i);
922
24
    Value *lowWithC = Builder.CreateCall(AddC, {opArg, low0, low1});
923
24
    Value *low = Builder.CreateExtractValue(lowWithC, 0);
924
24
    RetVal = Builder.CreateInsertElement(RetVal, low, i);
925
926
24
    Value *carry = Builder.CreateExtractValue(lowWithC, 1);
927
    // Ext i1 to i32
928
24
    carry = Builder.CreateZExt(carry, helper.i32Ty);
929
930
24
    Value *hi0 = Builder.CreateExtractElement(op0, i + 1);
931
24
    Value *hi1 = Builder.CreateExtractElement(op1, i + 1);
932
24
    Value *hi = Builder.CreateAdd(hi0, hi1);
933
24
    hi = Builder.CreateAdd(hi, carry);
934
24
    RetVal = Builder.CreateInsertElement(RetVal, hi, i + 1);
935
24
  }
936
16
  return RetVal;
937
32
}
938
939
936
bool IsValidLoadInput(Value *V) {
940
  // Must be load input.
941
  // TODO: report this error on front-end
942
936
  if (!V || !isa<CallInst>(V)) {
943
12
    return false;
944
12
  }
945
924
  CallInst *CI = cast<CallInst>(V);
946
  // Must be immediate.
947
924
  ConstantInt *opArg =
948
924
      cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
949
924
  DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
950
924
  if (op != DXIL::OpCode::LoadInput) {
951
0
    return false;
952
0
  }
953
924
  return true;
954
924
}
955
956
// Tunnel through insert/extract element and shuffle to find original source
957
// of scalar value, or specified element (vecIdx) of vector value.
958
936
Value *FindScalarSource(Value *src, unsigned vecIdx = 0) {
959
936
  Type *srcTy = src->getType()->getScalarType();
960
6.16k
  while (src && !isa<UndefValue>(src)) {
961
6.16k
    if (src->getType()->isVectorTy()) {
962
5.10k
      if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
963
4.18k
        unsigned curIdx = (unsigned)cast<ConstantInt>(IE->getOperand(2))
964
4.18k
                              ->getUniqueInteger()
965
4.18k
                              .getLimitedValue();
966
4.18k
        src = IE->getOperand((curIdx == vecIdx) ? 
1938
:
03.25k
);
967
4.18k
      } else 
if (ShuffleVectorInst *916
SV916
= dyn_cast<ShuffleVectorInst>(src)) {
968
904
        int newIdx = SV->getMaskValue(vecIdx);
969
904
        if (newIdx < 0)
970
0
          return UndefValue::get(srcTy);
971
904
        vecIdx = (unsigned)newIdx;
972
904
        src = SV->getOperand(0);
973
904
        unsigned numElt = src->getType()->getVectorNumElements();
974
904
        if (numElt <= vecIdx) {
975
0
          vecIdx -= numElt;
976
0
          src = SV->getOperand(1);
977
0
        }
978
904
      } else {
979
12
        return UndefValue::get(srcTy); // Didn't find it.
980
12
      }
981
5.10k
    } else {
982
1.06k
      if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(src)) {
983
56
        vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand())
984
56
                     ->getUniqueInteger()
985
56
                     .getLimitedValue();
986
56
        src = EE->getVectorOperand();
987
1.00k
      } else if (hlsl::dxilutil::IsConvergentMarker(src)) {
988
80
        src = hlsl::dxilutil::GetConvergentSource(src);
989
924
      } else {
990
924
        break; // Found it.
991
924
      }
992
1.06k
    }
993
6.16k
  }
994
924
  return src;
995
936
}
996
997
// Finds corresponding inputs, calls translation for each, and returns
998
// resulting vector or scalar.
999
// Uses functor that takes (inputElemID, rowIdx, colIdx), and returns
1000
// translation for one input scalar.
1001
Value *TranslateEvalHelper(
1002
    CallInst *CI, Value *val, IRBuilder<> &Builder,
1003
266
    std::function<Value *(Value *, Value *, Value *)> fnTranslateScalarInput) {
1004
266
  Type *Ty = CI->getType();
1005
266
  Value *result = UndefValue::get(Ty);
1006
266
  if (Ty->isVectorTy()) {
1007
1.10k
    for (unsigned i = 0; i < Ty->getVectorNumElements(); 
++i882
) {
1008
894
      Value *InputEl = FindScalarSource(val, i);
1009
894
      if (!IsValidLoadInput(InputEl)) {
1010
12
        dxilutil::EmitErrorOnInstruction(
1011
12
            CI, "attribute evaluation can only be done "
1012
12
                "on values taken directly from inputs.");
1013
12
        return result;
1014
12
      }
1015
882
      CallInst *loadInput = cast<CallInst>(InputEl);
1016
882
      Value *inputElemID =
1017
882
          loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
1018
882
      Value *rowIdx =
1019
882
          loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
1020
882
      Value *colIdx =
1021
882
          loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
1022
882
      Value *Elt = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
1023
882
      result = Builder.CreateInsertElement(result, Elt, i);
1024
882
    }
1025
224
  } else {
1026
42
    Value *InputEl = FindScalarSource(val);
1027
42
    if (!IsValidLoadInput(InputEl)) {
1028
0
      dxilutil::EmitErrorOnInstruction(CI,
1029
0
                                       "attribute evaluation can only be done "
1030
0
                                       "on values taken directly from inputs.");
1031
0
      return result;
1032
0
    }
1033
42
    CallInst *loadInput = cast<CallInst>(InputEl);
1034
42
    Value *inputElemID =
1035
42
        loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx);
1036
42
    Value *rowIdx =
1037
42
        loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx);
1038
42
    Value *colIdx =
1039
42
        loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx);
1040
42
    result = fnTranslateScalarInput(inputElemID, rowIdx, colIdx);
1041
42
  }
1042
254
  return result;
1043
266
}
1044
1045
Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1046
                           HLOperationLowerHelper &helper,
1047
                           HLObjectOperationLowerHelper *pObjHelper,
1048
80
                           bool &Translated) {
1049
80
  hlsl::OP *hlslOP = &helper.hlslOP;
1050
80
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1051
80
  Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1052
80
  IRBuilder<> Builder(CI);
1053
80
  OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
1054
80
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
1055
80
  Function *evalFunc =
1056
80
      hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
1057
1058
80
  return TranslateEvalHelper(
1059
80
      CI, val, Builder,
1060
160
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1061
160
        return Builder.CreateCall(
1062
160
            evalFunc, {opArg, inputElemID, rowIdx, colIdx, sampleIdx});
1063
160
      });
1064
80
}
1065
1066
Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1067
                            HLOperationLowerHelper &helper,
1068
                            HLObjectOperationLowerHelper *pObjHelper,
1069
16
                            bool &Translated) {
1070
16
  hlsl::OP *hlslOP = &helper.hlslOP;
1071
16
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1072
16
  Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1073
16
  IRBuilder<> Builder(CI);
1074
16
  Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0);
1075
16
  Value *offsetY = Builder.CreateExtractElement(offset, 1);
1076
16
  OP::OpCode opcode = OP::OpCode::EvalSnapped;
1077
16
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
1078
16
  Function *evalFunc =
1079
16
      hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
1080
1081
16
  return TranslateEvalHelper(
1082
16
      CI, val, Builder,
1083
64
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1084
64
        return Builder.CreateCall(
1085
64
            evalFunc, {opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY});
1086
64
      });
1087
16
}
1088
1089
Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1090
                             HLOperationLowerHelper &helper,
1091
                             HLObjectOperationLowerHelper *pObjHelper,
1092
88
                             bool &Translated) {
1093
88
  hlsl::OP *hlslOP = &helper.hlslOP;
1094
88
  Value *val = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
1095
88
  IRBuilder<> Builder(CI);
1096
88
  OP::OpCode opcode = OP::OpCode::EvalCentroid;
1097
88
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
1098
88
  Function *evalFunc =
1099
88
      hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType());
1100
1101
88
  return TranslateEvalHelper(
1102
88
      CI, val, Builder,
1103
410
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1104
410
        return Builder.CreateCall(evalFunc,
1105
410
                                  {opArg, inputElemID, rowIdx, colIdx});
1106
410
      });
1107
88
}
1108
1109
/*
1110
HLSL: bool RWDispatchNodeInputRecord<recordType>::FinishedCrossGroupSharing()
1111
DXIL: i1 @dx.op.finishedCrossGroupSharing(i32 %Opcode,
1112
%dx.types.NodeRecordHandle %NodeInputRecordHandle)
1113
*/
1114
Value *TranslateNodeFinishedCrossGroupSharing(
1115
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1116
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
1117
8
    bool &Translated) {
1118
8
  hlsl::OP *OP = &helper.hlslOP;
1119
1120
8
  Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
1121
8
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1122
8
  DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType());
1123
8
  Value *opArg = OP->GetU32Const((unsigned)op);
1124
1125
8
  IRBuilder<> Builder(CI);
1126
8
  return Builder.CreateCall(dxilFunc, {opArg, handle});
1127
8
}
1128
1129
/*
1130
HLSL:
1131
    bool NodeOutput<recordType>::IsValid()
1132
    bool EmptyNodeOutput::IsValid()
1133
DXIL:
1134
  i1 @dx.op.nodeOutputIsValid(i32 %Opcode, %dx.types.NodeHandle
1135
%NodeOutputHandle)
1136
*/
1137
Value *TranslateNodeOutputIsValid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1138
                                  HLOperationLowerHelper &helper,
1139
                                  HLObjectOperationLowerHelper *pObjHelper,
1140
48
                                  bool &Translated) {
1141
48
  hlsl::OP *OP = &helper.hlslOP;
1142
48
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1143
48
  Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
1144
48
  Value *opArg = OP->GetU32Const((unsigned)op);
1145
1146
48
  IRBuilder<> Builder(CI);
1147
48
  return Builder.CreateCall(dxilFunc, {opArg, handle});
1148
48
}
1149
1150
Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP,
1151
                                     OP::OpCode op,
1152
                                     HLOperationLowerHelper &helper,
1153
                                     HLObjectOperationLowerHelper *pObjHelper,
1154
82
                                     bool &Translated) {
1155
82
  DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate");
1156
82
  hlsl::OP *hlslOP = &helper.hlslOP;
1157
82
  IRBuilder<> Builder(CI);
1158
82
  Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx);
1159
82
  Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx);
1160
82
  Value *vertexI8Idx =
1161
82
      Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext()));
1162
82
  Value *opArg = hlslOP->GetU32Const((unsigned)op);
1163
82
  Function *evalFunc = hlslOP->GetOpFunc(op, val->getType()->getScalarType());
1164
1165
82
  return TranslateEvalHelper(
1166
82
      CI, val, Builder,
1167
290
      [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * {
1168
290
        return Builder.CreateCall(
1169
290
            evalFunc, {opArg, inputElemID, rowIdx, colIdx, vertexI8Idx});
1170
290
      });
1171
82
}
1172
/*
1173
1174
HLSL:
1175
void Barrier(uint MemoryTypeFlags, uint SemanticFlags)
1176
void Barrier(Object o, uint SemanticFlags)
1177
1178
All UAVs and/or Node Records by types:
1179
void @dx.op.barrierByMemoryType(i32 %Opcode,
1180
  i32 %MemoryTypeFlags, i32 %SemanticFlags)
1181
1182
UAV by handle:
1183
void @dx.op.barrierByMemoryHandle(i32 %Opcode,
1184
  %dx.types.Handle %Object, i32 %SemanticFlags)
1185
1186
Node Record by handle:
1187
void @dx.op.barrierByMemoryHandle(i32 %Opcode,
1188
  %dx.types.NodeRecordHandle %Object, i32 %SemanticFlags)
1189
*/
1190
1191
Value *TranslateBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1192
                        HLOperationLowerHelper &helper,
1193
                        HLObjectOperationLowerHelper *pObjHelper,
1194
242
                        bool &Translated) {
1195
242
  hlsl::OP *OP = &helper.hlslOP;
1196
242
  Value *HandleOrMemoryFlags =
1197
242
      CI->getArgOperand(HLOperandIndex::kBarrierMemoryTypeFlagsOpIdx);
1198
242
  Value *SemanticFlags =
1199
242
      CI->getArgOperand(HLOperandIndex::kBarrierSemanticFlagsOpIdx);
1200
242
  IRBuilder<> Builder(CI);
1201
1202
242
  if (HandleOrMemoryFlags->getType()->isIntegerTy()) {
1203
86
    op = OP::OpCode::BarrierByMemoryType;
1204
156
  } else if (HandleOrMemoryFlags->getType() == OP->GetHandleType()) {
1205
80
    op = OP::OpCode::BarrierByMemoryHandle;
1206
80
  } else 
if (76
HandleOrMemoryFlags->getType() == OP->GetNodeRecordHandleType()76
) {
1207
76
    op = OP::OpCode::BarrierByNodeRecordHandle;
1208
76
  } else {
1209
0
    DXASSERT(false, "Shouldn't get here");
1210
0
  }
1211
1212
242
  Function *dxilFunc = OP->GetOpFunc(op, CI->getType());
1213
242
  Constant *opArg = OP->GetU32Const((unsigned)op);
1214
1215
242
  Value *args[] = {opArg, HandleOrMemoryFlags, SemanticFlags};
1216
1217
242
  Builder.CreateCall(dxilFunc, args);
1218
242
  return nullptr;
1219
242
}
1220
1221
Value *TranslateGetGroupOrThreadNodeOutputRecords(
1222
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1223
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
1224
276
    bool isPerThreadRecord, bool &Translated) {
1225
276
  IRBuilder<> Builder(CI);
1226
276
  hlsl::OP *OP = &helper.hlslOP;
1227
276
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1228
276
  Function *dxilFunc = OP->GetOpFunc(op, Builder.getVoidTy());
1229
276
  Value *opArg = OP->GetU32Const((unsigned)op);
1230
276
  Value *count =
1231
276
      CI->getArgOperand(HLOperandIndex::kAllocateRecordNumRecordsIdx);
1232
276
  Value *perThread = OP->GetI1Const(isPerThreadRecord);
1233
1234
276
  Value *args[] = {opArg, handle, count, perThread};
1235
1236
276
  return Builder.CreateCall(dxilFunc, args);
1237
276
}
1238
1239
/*
1240
HLSL:
1241
GroupNodeOutputRecords<recordType>
1242
NodeOutput<recordType>::GetGroupNodeOutputRecords(uint numRecords); DXIL:
1243
%dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode,
1244
%dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread)
1245
*/
1246
Value *
1247
TranslateGetGroupNodeOutputRecords(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1248
                                   HLOperationLowerHelper &helper,
1249
                                   HLObjectOperationLowerHelper *pObjHelper,
1250
146
                                   bool &Translated) {
1251
146
  return TranslateGetGroupOrThreadNodeOutputRecords(
1252
146
      CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ false,
1253
146
      Translated);
1254
146
}
1255
1256
/*
1257
HLSL:
1258
ThreadNodeOutputRecords<recordType>
1259
NodeOutput<recordType>::GetThreadNodeOutputRecords(uint numRecords) DXIL:
1260
%dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode,
1261
%dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread)
1262
*/
1263
Value *TranslateGetThreadNodeOutputRecords(
1264
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1265
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
1266
130
    bool &Translated) {
1267
130
  return TranslateGetGroupOrThreadNodeOutputRecords(
1268
130
      CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ true,
1269
130
      Translated);
1270
130
}
1271
1272
/*
1273
HLSL:
1274
uint EmptyNodeInput::Count()
1275
uint GroupNodeInputRecords<recordType>::Count()
1276
uint RWGroupNodeInputRecords<recordType>::Count()
1277
1278
DXIL:
1279
i32 @dx.op.getInputRecordCount(i32 %Opcode, %dx.types.NodeRecordHandle
1280
%NodeInputHandle)
1281
*/
1282
Value *
1283
TranslateNodeGetInputRecordCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1284
                                 HLOperationLowerHelper &helper,
1285
                                 HLObjectOperationLowerHelper *pObjHelper,
1286
30
                                 bool &Translated) {
1287
30
  hlsl::OP *OP = &helper.hlslOP;
1288
1289
30
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
1290
30
  DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType());
1291
30
  Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
1292
30
  Value *opArg = OP->GetU32Const((unsigned)op);
1293
30
  Value *args[] = {opArg, handle};
1294
1295
30
  IRBuilder<> Builder(CI);
1296
30
  return Builder.CreateCall(dxilFunc, args);
1297
30
}
1298
1299
Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1300
                             HLOperationLowerHelper &helper,
1301
                             HLObjectOperationLowerHelper *pObjHelper,
1302
176
                             bool &Translated) {
1303
176
  hlsl::OP *hlslOP = &helper.hlslOP;
1304
176
  Type *Ty = Type::getVoidTy(CI->getContext());
1305
1306
176
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1307
176
  Value *args[] = {opArg};
1308
176
  IRBuilder<> Builder(CI);
1309
176
  Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
1310
1311
176
  return dxilOp;
1312
176
}
1313
1314
Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP,
1315
                                    OP::OpCode opcode,
1316
                                    HLOperationLowerHelper &helper,
1317
                                    HLObjectOperationLowerHelper *pObjHelper,
1318
360
                                    bool &Translated) {
1319
360
  hlsl::OP *hlslOP = &helper.hlslOP;
1320
360
  Type *Ty = CI->getType();
1321
1322
360
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1323
360
  Value *args[] = {opArg};
1324
360
  IRBuilder<> Builder(CI);
1325
360
  Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
1326
1327
360
  return dxilOp;
1328
360
}
1329
1330
Value *TrivialNoArgWithRetNoOverloadOperation(
1331
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1332
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
1333
20
    bool &Translated) {
1334
20
  hlsl::OP *hlslOP = &helper.hlslOP;
1335
20
  Type *Ty = CI->getType();
1336
1337
20
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1338
20
  Value *args[] = {opArg};
1339
20
  IRBuilder<> Builder(CI);
1340
20
  return TrivialDxilOperation(opcode, args, Builder.getVoidTy(), Ty, hlslOP,
1341
20
                              Builder);
1342
20
}
1343
1344
Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
1345
                               HLOperationLowerHelper &helper,
1346
                               HLObjectOperationLowerHelper *pObjHelper,
1347
16
                               bool &Translated) {
1348
16
  hlsl::OP *hlslOP = &helper.hlslOP;
1349
16
  OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition;
1350
16
  IRBuilder<> Builder(CI);
1351
1352
16
  Type *Ty = Type::getVoidTy(CI->getContext());
1353
16
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1354
1355
16
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1356
16
  Value *args[] = {opArg, val};
1357
1358
16
  Value *samplePos =
1359
16
      TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder);
1360
1361
16
  Value *result = UndefValue::get(CI->getType());
1362
16
  Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
1363
16
  Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
1364
16
  result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
1365
16
  result = Builder.CreateInsertElement(result, samplePosY, 1);
1366
16
  return result;
1367
16
}
1368
1369
// val QuadReadLaneAt(val, uint);
1370
Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1371
                               HLOperationLowerHelper &helper,
1372
                               HLObjectOperationLowerHelper *pObjHelper,
1373
66
                               bool &Translated) {
1374
66
  hlsl::OP *hlslOP = &helper.hlslOP;
1375
66
  Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
1376
66
  return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs,
1377
66
                              CI->getOperand(1)->getType(), CI, hlslOP);
1378
66
}
1379
1380
// Quad intrinsics of the form fn(val,QuadOpKind)->val
1381
Value *TranslateQuadAnyAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1382
                           HLOperationLowerHelper &helper,
1383
                           HLObjectOperationLowerHelper *pObjHelper,
1384
22
                           bool &Translated) {
1385
22
  hlsl::OP *hlslOP = &helper.hlslOP;
1386
22
  DXIL::QuadVoteOpKind opKind;
1387
22
  switch (IOP) {
1388
10
  case IntrinsicOp::IOP_QuadAll:
1389
10
    opKind = DXIL::QuadVoteOpKind::All;
1390
10
    break;
1391
12
  case IntrinsicOp::IOP_QuadAny:
1392
12
    opKind = DXIL::QuadVoteOpKind::Any;
1393
12
    break;
1394
0
  default:
1395
0
    llvm_unreachable(
1396
22
        "QuadAny/QuadAll translation called with wrong isntruction");
1397
22
  }
1398
22
  Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
1399
22
  Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
1400
22
  return TrivialDxilOperation(DXIL::OpCode::QuadVote, refArgs,
1401
22
                              CI->getOperand(1)->getType(), CI, hlslOP);
1402
22
}
1403
1404
// Wave intrinsics of the form fn(val,QuadOpKind)->val
1405
Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1406
                               HLOperationLowerHelper &helper,
1407
                               HLObjectOperationLowerHelper *pObjHelper,
1408
102
                               bool &Translated) {
1409
102
  hlsl::OP *hlslOP = &helper.hlslOP;
1410
102
  DXIL::QuadOpKind opKind;
1411
102
  switch (IOP) {
1412
34
  case IntrinsicOp::IOP_QuadReadAcrossX:
1413
34
    opKind = DXIL::QuadOpKind::ReadAcrossX;
1414
34
    break;
1415
32
  case IntrinsicOp::IOP_QuadReadAcrossY:
1416
32
    opKind = DXIL::QuadOpKind::ReadAcrossY;
1417
32
    break;
1418
0
  default:
1419
0
    DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal);
1420
0
    LLVM_FALLTHROUGH;
1421
36
  case IntrinsicOp::IOP_QuadReadAcrossDiagonal:
1422
36
    opKind = DXIL::QuadOpKind::ReadAcrossDiagonal;
1423
36
    break;
1424
102
  }
1425
102
  Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind);
1426
102
  Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg};
1427
102
  return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs,
1428
102
                              CI->getOperand(1)->getType(), CI, hlslOP);
1429
102
}
1430
1431
// WaveAllEqual(val<n>)->bool<n>
1432
Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1433
                             HLOperationLowerHelper &helper,
1434
                             HLObjectOperationLowerHelper *pObjHelper,
1435
80
                             bool &Translated) {
1436
80
  hlsl::OP *hlslOP = &helper.hlslOP;
1437
80
  Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx);
1438
80
  IRBuilder<> Builder(CI);
1439
1440
80
  Type *Ty = src->getType();
1441
80
  Type *RetTy = Type::getInt1Ty(CI->getContext());
1442
80
  if (Ty->isVectorTy())
1443
4
    RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
1444
1445
80
  Constant *opArg =
1446
80
      hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual);
1447
80
  Value *args[] = {opArg, src};
1448
1449
80
  return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy,
1450
80
                              hlslOP, Builder);
1451
80
}
1452
1453
static Value *TranslateWaveMatchFixReturn(IRBuilder<> &Builder, Type *TargetTy,
1454
46
                                          Value *RetVal) {
1455
46
  Value *ResVec = UndefValue::get(TargetTy);
1456
230
  for (unsigned i = 0; i != 4; 
++i184
) {
1457
184
    Value *Elt = Builder.CreateExtractValue(RetVal, i);
1458
184
    ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
1459
184
  }
1460
1461
46
  return ResVec;
1462
46
}
1463
1464
// WaveMatch(val<n>)->uint4
1465
Value *TranslateWaveMatch(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
1466
                          HLOperationLowerHelper &Helper,
1467
                          HLObjectOperationLowerHelper *ObjHelper,
1468
46
                          bool &Translated) {
1469
46
  hlsl::OP *Op = &Helper.hlslOP;
1470
46
  IRBuilder<> Builder(CI);
1471
1472
46
  Value *Val = CI->getArgOperand(1);
1473
46
  Type *ValTy = Val->getType();
1474
46
  Type *EltTy = ValTy->getScalarType();
1475
46
  Constant *OpcArg = Op->GetU32Const((unsigned)DXIL::OpCode::WaveMatch);
1476
1477
  // If we don't need to scalarize, just emit the call and exit
1478
46
  const bool Scalarize =
1479
46
      ValTy->isVectorTy() &&
1480
46
      
!Op->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()14
;
1481
46
  if (!Scalarize) {
1482
36
    Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, ValTy);
1483
36
    Value *Args[] = {OpcArg, Val};
1484
36
    Value *Ret = Builder.CreateCall(Fn, Args);
1485
36
    return TranslateWaveMatchFixReturn(Builder, CI->getType(), Ret);
1486
36
  }
1487
1488
  // Generate a dx.op.waveMatch call for each scalar in the input, and perform
1489
  // a bitwise AND between each result to derive the final bitmask
1490
1491
  // (1) Collect the list of all scalar inputs (e.g. decompose vectors)
1492
10
  SmallVector<Value *, 4> ScalarInputs;
1493
1494
50
  for (uint64_t I = 0, E = ValTy->getVectorNumElements(); I != E; 
++I40
) {
1495
40
    Value *Elt = Builder.CreateExtractElement(Val, I);
1496
40
    ScalarInputs.push_back(Elt);
1497
40
  }
1498
1499
  // (2) For each scalar, emit a call to dx.op.waveMatch. If this is not the
1500
  // first scalar, then AND the result with the accumulator.
1501
10
  Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, EltTy);
1502
10
  Value *Args[] = {OpcArg, ScalarInputs[0]};
1503
10
  Value *Res = Builder.CreateCall(Fn, Args);
1504
1505
40
  for (unsigned I = 1, E = ScalarInputs.size(); I != E; 
++I30
) {
1506
30
    Value *Args[] = {OpcArg, ScalarInputs[I]};
1507
30
    Value *Call = Builder.CreateCall(Fn, Args);
1508
1509
    // Generate bitwise AND of the components
1510
150
    for (unsigned J = 0; J != 4; 
++J120
) {
1511
120
      Value *ResVal = Builder.CreateExtractValue(Res, J);
1512
120
      Value *CallVal = Builder.CreateExtractValue(Call, J);
1513
120
      Value *And = Builder.CreateAnd(ResVal, CallVal);
1514
120
      Res = Builder.CreateInsertValue(Res, And, J);
1515
120
    }
1516
30
  }
1517
1518
  // (3) Convert the final aggregate into a vector to make the types match
1519
10
  return TranslateWaveMatchFixReturn(Builder, CI->getType(), Res);
1520
46
}
1521
1522
// Wave intrinsics of the form fn(valA)->valB, where no overloading takes place
1523
Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1524
                        HLOperationLowerHelper &helper,
1525
                        HLObjectOperationLowerHelper *pObjHelper,
1526
162
                        bool &Translated) {
1527
162
  hlsl::OP *hlslOP = &helper.hlslOP;
1528
162
  Value *refArgs[] = {nullptr, CI->getOperand(1)};
1529
162
  return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
1530
162
}
1531
// Wave ballot intrinsic.
1532
Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1533
                           HLOperationLowerHelper &helper,
1534
                           HLObjectOperationLowerHelper *pObjHelper,
1535
32
                           bool &Translated) {
1536
  // The high-level operation is uint4 ballot(i1).
1537
  // The DXIL operation is struct.u4 ballot(i1).
1538
  // To avoid updating users with more than a simple replace, we translate into
1539
  // a call into struct.u4, then reassemble the vector.
1540
  // Scalarization and constant propagation take care of cleanup.
1541
32
  IRBuilder<> B(CI);
1542
1543
  // Make the DXIL call itself.
1544
32
  hlsl::OP *hlslOP = &helper.hlslOP;
1545
32
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
1546
32
  Value *refArgs[] = {opArg, CI->getOperand(1)};
1547
32
  Function *dxilFunc =
1548
32
      hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
1549
32
  Value *dxilVal =
1550
32
      B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode));
1551
1552
  // Assign from the call results into a vector.
1553
32
  Type *ResTy = CI->getType();
1554
32
  DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4);
1555
32
  DXASSERT_NOMSG(dxilVal->getType()->isStructTy() &&
1556
32
                 dxilVal->getType()->getNumContainedTypes() == 4);
1557
1558
  // 'x' component is the first vector element, highest bits.
1559
32
  Value *ResVal = llvm::UndefValue::get(ResTy);
1560
160
  for (unsigned Idx = 0; Idx < 4; 
++Idx128
) {
1561
128
    ResVal = B.CreateInsertElement(
1562
128
        ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx);
1563
128
  }
1564
1565
32
  return ResVal;
1566
32
}
1567
1568
670
static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) {
1569
670
  return opcode == OP::OpCode::WaveActiveOp ||
1570
670
         
opcode == OP::OpCode::WavePrefixOp288
;
1571
670
}
1572
1573
946
static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) {
1574
946
  if (IOP == IntrinsicOp::IOP_WaveActiveUMax ||
1575
946
      
IOP == IntrinsicOp::IOP_WaveActiveUMin908
||
1576
946
      
IOP == IntrinsicOp::IOP_WaveActiveUSum870
||
1577
946
      
IOP == IntrinsicOp::IOP_WaveActiveUProduct840
||
1578
946
      
IOP == IntrinsicOp::IOP_WaveMultiPrefixUProduct834
||
1579
946
      
IOP == IntrinsicOp::IOP_WaveMultiPrefixUSum820
||
1580
946
      
IOP == IntrinsicOp::IOP_WavePrefixUSum806
||
1581
946
      
IOP == IntrinsicOp::IOP_WavePrefixUProduct776
)
1582
176
    return (unsigned)DXIL::SignedOpKind::Unsigned;
1583
770
  return (unsigned)DXIL::SignedOpKind::Signed;
1584
946
}
1585
1586
946
static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) {
1587
946
  switch (IOP) {
1588
  // Bit operations.
1589
28
  case IntrinsicOp::IOP_WaveActiveBitOr:
1590
28
    return (unsigned)DXIL::WaveBitOpKind::Or;
1591
62
  case IntrinsicOp::IOP_WaveActiveBitAnd:
1592
62
    return (unsigned)DXIL::WaveBitOpKind::And;
1593
44
  case IntrinsicOp::IOP_WaveActiveBitXor:
1594
44
    return (unsigned)DXIL::WaveBitOpKind::Xor;
1595
  // Prefix operations.
1596
44
  case IntrinsicOp::IOP_WavePrefixSum:
1597
74
  case IntrinsicOp::IOP_WavePrefixUSum:
1598
74
    return (unsigned)DXIL::WaveOpKind::Sum;
1599
74
  case IntrinsicOp::IOP_WavePrefixProduct:
1600
80
  case IntrinsicOp::IOP_WavePrefixUProduct:
1601
80
    return (unsigned)DXIL::WaveOpKind::Product;
1602
    // Numeric operations.
1603
46
  case IntrinsicOp::IOP_WaveActiveMax:
1604
84
  case IntrinsicOp::IOP_WaveActiveUMax:
1605
84
    return (unsigned)DXIL::WaveOpKind::Max;
1606
60
  case IntrinsicOp::IOP_WaveActiveMin:
1607
98
  case IntrinsicOp::IOP_WaveActiveUMin:
1608
98
    return (unsigned)DXIL::WaveOpKind::Min;
1609
90
  case IntrinsicOp::IOP_WaveActiveSum:
1610
120
  case IntrinsicOp::IOP_WaveActiveUSum:
1611
120
    return (unsigned)DXIL::WaveOpKind::Sum;
1612
74
  case IntrinsicOp::IOP_WaveActiveProduct:
1613
80
  case IntrinsicOp::IOP_WaveActiveUProduct:
1614
  // MultiPrefix operations
1615
124
  case IntrinsicOp::IOP_WaveMultiPrefixBitAnd:
1616
124
    return (unsigned)DXIL::WaveMultiPrefixOpKind::And;
1617
44
  case IntrinsicOp::IOP_WaveMultiPrefixBitOr:
1618
44
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Or;
1619
44
  case IntrinsicOp::IOP_WaveMultiPrefixBitXor:
1620
44
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Xor;
1621
58
  case IntrinsicOp::IOP_WaveMultiPrefixProduct:
1622
72
  case IntrinsicOp::IOP_WaveMultiPrefixUProduct:
1623
72
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Product;
1624
58
  case IntrinsicOp::IOP_WaveMultiPrefixSum:
1625
72
  case IntrinsicOp::IOP_WaveMultiPrefixUSum:
1626
72
    return (unsigned)DXIL::WaveMultiPrefixOpKind::Sum;
1627
0
  default:
1628
0
    DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct ||
1629
0
                 IOP == IntrinsicOp::IOP_WaveActiveUProduct,
1630
0
             "else caller passed incorrect value");
1631
0
    return (unsigned)DXIL::WaveOpKind::Product;
1632
946
  }
1633
946
}
1634
1635
// Wave intrinsics of the form fn(valA)->valA
1636
Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1637
                        HLOperationLowerHelper &helper,
1638
                        HLObjectOperationLowerHelper *pObjHelper,
1639
670
                        bool &Translated) {
1640
670
  hlsl::OP *hlslOP = &helper.hlslOP;
1641
1642
670
  Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP));
1643
670
  Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
1644
670
  Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt};
1645
670
  unsigned refArgCount = _countof(refArgs);
1646
670
  if (!WaveIntrinsicNeedsSign(opcode))
1647
134
    refArgCount--;
1648
670
  return TrivialDxilOperation(opcode,
1649
670
                              llvm::ArrayRef<Value *>(refArgs, refArgCount),
1650
670
                              CI->getOperand(1)->getType(), CI, hlslOP);
1651
670
}
1652
1653
// WaveMultiPrefixOP(val<n>, mask) -> val<n>
1654
Value *TranslateWaveMultiPrefix(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc,
1655
                                HLOperationLowerHelper &Helper,
1656
                                HLObjectOperationLowerHelper *ObjHelper,
1657
276
                                bool &Translated) {
1658
276
  hlsl::OP *Op = &Helper.hlslOP;
1659
1660
276
  Constant *KindValInt = Op->GetI8Const(WaveIntrinsicToOpKind(IOP));
1661
276
  Constant *SignValInt = Op->GetI8Const(WaveIntrinsicToSignedOpKind(IOP));
1662
1663
  // Decompose mask into scalars
1664
276
  IRBuilder<> Builder(CI);
1665
276
  Value *Mask = CI->getArgOperand(2);
1666
276
  Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
1667
276
  Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
1668
276
  Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
1669
276
  Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
1670
1671
276
  Value *Args[] = {nullptr, CI->getOperand(1), Mask0,     Mask1, Mask2,
1672
276
                   Mask3,   KindValInt,        SignValInt};
1673
1674
276
  return TrivialDxilOperation(Opc, Args, CI->getOperand(1)->getType(), CI, Op);
1675
276
}
1676
1677
// WaveMultiPrefixBitCount(i1, mask) -> i32
1678
Value *TranslateWaveMultiPrefixBitCount(CallInst *CI, IntrinsicOp IOP,
1679
                                        OP::OpCode Opc,
1680
                                        HLOperationLowerHelper &Helper,
1681
                                        HLObjectOperationLowerHelper *ObjHelper,
1682
40
                                        bool &Translated) {
1683
40
  hlsl::OP *Op = &Helper.hlslOP;
1684
1685
  // Decompose mask into scalars
1686
40
  IRBuilder<> Builder(CI);
1687
40
  Value *Mask = CI->getArgOperand(2);
1688
40
  Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0);
1689
40
  Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1);
1690
40
  Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2);
1691
40
  Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3);
1692
1693
40
  Value *Args[] = {nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, Mask3};
1694
1695
40
  return TrivialDxilOperation(Opc, Args, Helper.voidTy, CI, Op);
1696
40
}
1697
1698
// Wave intrinsics of the form fn()->val
1699
Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1700
                          HLOperationLowerHelper &helper,
1701
                          HLObjectOperationLowerHelper *pObjHelper,
1702
164
                          bool &Translated) {
1703
164
  hlsl::OP *hlslOP = &helper.hlslOP;
1704
164
  Value *refArgs[] = {nullptr};
1705
164
  return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
1706
164
}
1707
1708
// Wave intrinsics of the form fn(val,lane)->val
1709
Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1710
                               HLOperationLowerHelper &helper,
1711
                               HLObjectOperationLowerHelper *pObjHelper,
1712
98
                               bool &Translated) {
1713
98
  hlsl::OP *hlslOP = &helper.hlslOP;
1714
98
  Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)};
1715
98
  return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs,
1716
98
                              CI->getOperand(1)->getType(), CI, hlslOP);
1717
98
}
1718
1719
// Wave intrinsics of the form fn(val)->val
1720
Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP,
1721
                                  OP::OpCode opcode,
1722
                                  HLOperationLowerHelper &helper,
1723
                                  HLObjectOperationLowerHelper *pObjHelper,
1724
274
                                  bool &Translated) {
1725
274
  hlsl::OP *hlslOP = &helper.hlslOP;
1726
274
  Value *refArgs[] = {nullptr, CI->getOperand(1)};
1727
274
  return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs,
1728
274
                              CI->getOperand(1)->getType(), CI, hlslOP);
1729
274
}
1730
1731
Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1732
                    HLOperationLowerHelper &helper,
1733
                    HLObjectOperationLowerHelper *pObjHelper,
1734
950
                    bool &Translated) {
1735
950
  hlsl::OP *hlslOP = &helper.hlslOP;
1736
950
  Type *pOverloadTy = CI->getType()->getScalarType();
1737
950
  if (pOverloadTy->isFloatingPointTy()) {
1738
804
    Value *refArgs[] = {nullptr, CI->getOperand(1)};
1739
804
    return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI,
1740
804
                                hlslOP);
1741
804
  }
1742
1743
146
  Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1744
146
  IRBuilder<> Builder(CI);
1745
146
  Value *neg = Builder.CreateNeg(src);
1746
146
  return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP,
1747
146
                                    Builder);
1748
950
}
1749
1750
Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1751
                     HLOperationLowerHelper &helper,
1752
                     HLObjectOperationLowerHelper *pObjHelper,
1753
24
                     bool &Translated) {
1754
24
  return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op
1755
24
}
1756
1757
28
Value *GenerateVectorCmpNEZero(Value *Val, IRBuilder<> Builder) {
1758
28
  Type *Ty = Val->getType();
1759
28
  Type *EltTy = Ty->getScalarType();
1760
1761
28
  Value *ZeroInit = ConstantAggregateZero::get(Ty);
1762
1763
28
  if (EltTy->isFloatingPointTy())
1764
4
    return Builder.CreateFCmpUNE(Val, ZeroInit);
1765
1766
24
  return Builder.CreateICmpNE(Val, ZeroInit);
1767
28
}
1768
1769
1.32k
Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) {
1770
1.32k
  Type *Ty = val->getType();
1771
1.32k
  Type *EltTy = Ty->getScalarType();
1772
1773
1.32k
  Constant *zero = nullptr;
1774
1.32k
  if (EltTy->isFloatingPointTy())
1775
104
    zero = ConstantFP::get(EltTy, 0);
1776
1.21k
  else
1777
1.21k
    zero = ConstantInt::get(EltTy, 0);
1778
1779
1.32k
  if (Ty != EltTy)
1780
0
    zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
1781
1782
1.32k
  if (EltTy->isFloatingPointTy())
1783
104
    return Builder.CreateFCmpUNE(val, zero);
1784
1785
1.21k
  return Builder.CreateICmpNE(val, zero);
1786
1.32k
}
1787
1788
Value *TranslateBitwisePredicate(CallInst *CI, IntrinsicOp IOP,
1789
352
                                 hlsl::OP *HlslOP) {
1790
352
  Value *Arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1791
352
  IRBuilder<> Builder(CI);
1792
1793
352
  Type *Ty = Arg->getType();
1794
352
  Type *EltTy = Ty->getScalarType();
1795
1796
352
  if (Ty == EltTy)
1797
42
    return GenerateCmpNEZero(Arg, Builder);
1798
1799
310
  if (HlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) {
1800
28
    DXIL::OpCode ReduceOp = DXIL::OpCode::VectorReduceAnd;
1801
28
    switch (IOP) {
1802
14
    case IntrinsicOp::IOP_all:
1803
14
      ReduceOp = DXIL::OpCode::VectorReduceAnd;
1804
14
      break;
1805
14
    case IntrinsicOp::IOP_any:
1806
14
      ReduceOp = DXIL::OpCode::VectorReduceOr;
1807
14
      break;
1808
0
    default:
1809
0
      assert(false && "Unexpected reduction IOP");
1810
0
      break;
1811
28
    }
1812
1813
    // Compare each element to zero
1814
28
    Value *VecCmpZero = GenerateVectorCmpNEZero(Arg, Builder);
1815
28
    Type *VecCmpTy = VecCmpZero->getType();
1816
1817
    // Reduce the vector with the appropiate op
1818
28
    Constant *OpArg = HlslOP->GetU32Const((unsigned)ReduceOp);
1819
28
    Value *Args[] = {OpArg, VecCmpZero};
1820
28
    Function *DxilFunc = HlslOP->GetOpFunc(ReduceOp, VecCmpTy);
1821
28
    return TrivialDxilVectorOperation(DxilFunc, ReduceOp, Args, VecCmpTy,
1822
28
                                      HlslOP, Builder);
1823
28
  }
1824
1825
282
  SmallVector<Value *, 4> EltIsNEZero;
1826
1.56k
  for (unsigned I = 0; I < Ty->getVectorNumElements(); 
I++1.28k
) {
1827
1.28k
    Value *Elt = Builder.CreateExtractElement(Arg, I);
1828
1.28k
    Elt = GenerateCmpNEZero(Elt, Builder);
1829
1.28k
    EltIsNEZero.push_back(Elt);
1830
1.28k
  }
1831
1832
  // and/or the components together
1833
282
  Value *Reduce = EltIsNEZero[0];
1834
1.28k
  for (unsigned I = 1; I < EltIsNEZero.size(); 
I++998
) {
1835
998
    Value *Elt = EltIsNEZero[I];
1836
998
    switch (IOP) {
1837
426
    case IntrinsicOp::IOP_all:
1838
426
      Reduce = Builder.CreateAnd(Reduce, Elt);
1839
426
      break;
1840
572
    case IntrinsicOp::IOP_any:
1841
572
      Reduce = Builder.CreateOr(Reduce, Elt);
1842
572
      break;
1843
0
    default:
1844
0
      assert(false && "Unexpected reduction IOP");
1845
0
      break;
1846
998
    }
1847
998
  }
1848
1849
282
  return Reduce;
1850
282
}
1851
1852
Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
1853
                    HLOperationLowerHelper &Helper,
1854
                    HLObjectOperationLowerHelper *PObjHelper,
1855
156
                    bool &Translated) {
1856
156
  return TranslateBitwisePredicate(CI, IOP, &Helper.hlslOP);
1857
156
}
1858
1859
Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
1860
                    HLOperationLowerHelper &Helper,
1861
                    HLObjectOperationLowerHelper *PObjHelper,
1862
196
                    bool &Translated) {
1863
196
  return TranslateBitwisePredicate(CI, IOP, &Helper.hlslOP);
1864
196
}
1865
1866
Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1867
                        HLOperationLowerHelper &helper,
1868
                        HLObjectOperationLowerHelper *pObjHelper,
1869
1.84k
                        bool &Translated) {
1870
1.84k
  Type *Ty = CI->getType();
1871
1.84k
  Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
1872
1.84k
  IRBuilder<> Builder(CI);
1873
1.84k
  return Builder.CreateBitCast(op, Ty);
1874
1.84k
}
1875
1876
Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi,
1877
32
                             IRBuilder<> &Builder, hlsl::OP *hlslOP) {
1878
32
  Type *Ty = x->getType();
1879
32
  Type *outTy = lo->getType()->getPointerElementType();
1880
32
  DXIL::OpCode opcode = DXIL::OpCode::SplitDouble;
1881
1882
32
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
1883
32
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
1884
1885
32
  if (Ty->isVectorTy()) {
1886
8
    Value *retValLo = llvm::UndefValue::get(outTy);
1887
8
    Value *retValHi = llvm::UndefValue::get(outTy);
1888
8
    unsigned vecSize = Ty->getVectorNumElements();
1889
1890
24
    for (unsigned i = 0; i < vecSize; 
i++16
) {
1891
16
      Value *Elt = Builder.CreateExtractElement(x, i);
1892
16
      Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt},
1893
16
                                        hlslOP->GetOpCodeName(opcode));
1894
16
      Value *EltLo = Builder.CreateExtractValue(EltOP, 0);
1895
16
      retValLo = Builder.CreateInsertElement(retValLo, EltLo, i);
1896
16
      Value *EltHi = Builder.CreateExtractValue(EltOP, 1);
1897
16
      retValHi = Builder.CreateInsertElement(retValHi, EltHi, i);
1898
16
    }
1899
8
    Builder.CreateStore(retValLo, lo);
1900
8
    Builder.CreateStore(retValHi, hi);
1901
24
  } else {
1902
24
    Value *retVal =
1903
24
        Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode));
1904
24
    Value *retValLo = Builder.CreateExtractValue(retVal, 0);
1905
24
    Value *retValHi = Builder.CreateExtractValue(retVal, 1);
1906
24
    Builder.CreateStore(retValLo, lo);
1907
24
    Builder.CreateStore(retValHi, hi);
1908
24
  }
1909
1910
32
  return nullptr;
1911
32
}
1912
1913
Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1914
                       HLOperationLowerHelper &helper,
1915
                       HLObjectOperationLowerHelper *pObjHelper,
1916
608
                       bool &Translated) {
1917
608
  if (CI->getNumArgOperands() == 2)
1918
576
    return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated);
1919
1920
32
  DXASSERT_NOMSG(CI->getNumArgOperands() == 4);
1921
32
  hlsl::OP *hlslOP = &helper.hlslOP;
1922
32
  Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
1923
32
  DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy());
1924
32
  Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
1925
32
  Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
1926
32
  IRBuilder<> Builder(CI);
1927
32
  return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP);
1928
608
}
1929
1930
Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1931
                         HLOperationLowerHelper &helper,
1932
                         HLObjectOperationLowerHelper *pObjHelper,
1933
66
                         bool &Translated) {
1934
66
  hlsl::OP *hlslOP = &helper.hlslOP;
1935
66
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1936
66
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1937
1938
66
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
1939
66
  IRBuilder<> Builder(CI);
1940
66
  return TrivialDxilOperation(opcode, {opArg, x, y}, CI->getType(),
1941
66
                              CI->getType(), hlslOP, Builder);
1942
66
}
1943
1944
Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1945
                      HLOperationLowerHelper &helper,
1946
                      HLObjectOperationLowerHelper *pObjHelper,
1947
56
                      bool &Translated) {
1948
56
  hlsl::OP *hlslOP = &helper.hlslOP;
1949
56
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
1950
56
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
1951
1952
56
  IRBuilder<> Builder(CI);
1953
56
  Value *tan = Builder.CreateFDiv(y, x);
1954
1955
56
  Value *atan =
1956
56
      TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder);
1957
  // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
1958
56
  Type *Ty = x->getType();
1959
56
  Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI);
1960
56
  Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
1961
56
  Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
1962
56
  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
1963
56
  if (Ty->isVectorTy()) {
1964
22
    unsigned vecSize = Ty->getVectorNumElements();
1965
22
    pi = ConstantVector::getSplat(vecSize, pi);
1966
22
    halfPi = ConstantVector::getSplat(vecSize, halfPi);
1967
22
    negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi);
1968
22
    zero = ConstantVector::getSplat(vecSize, zero);
1969
22
  }
1970
56
  Value *atanAddPi = Builder.CreateFAdd(atan, pi);
1971
56
  Value *atanSubPi = Builder.CreateFSub(atan, pi);
1972
1973
  // x > 0 -> atan.
1974
56
  Value *result = atan;
1975
56
  Value *xLt0 = Builder.CreateFCmpOLT(x, zero);
1976
56
  Value *xEq0 = Builder.CreateFCmpOEQ(x, zero);
1977
1978
56
  Value *yGe0 = Builder.CreateFCmpOGE(y, zero);
1979
56
  Value *yLt0 = Builder.CreateFCmpOLT(y, zero);
1980
  // x < 0, y >= 0 -> atan + pi.
1981
56
  Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0);
1982
56
  result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result);
1983
1984
  // x < 0, y < 0 -> atan - pi.
1985
56
  Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0);
1986
56
  result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result);
1987
1988
  // x == 0, y < 0 -> -pi/2
1989
56
  Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0);
1990
56
  result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result);
1991
  // x == 0, y > 0 -> pi/2
1992
56
  Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0);
1993
56
  result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result);
1994
1995
56
  return result;
1996
56
}
1997
1998
Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
1999
                      HLOperationLowerHelper &helper,
2000
                      HLObjectOperationLowerHelper *pObjHelper,
2001
764
                      bool &Translated) {
2002
764
  hlsl::OP *hlslOP = &helper.hlslOP;
2003
764
  Type *Ty = CI->getType();
2004
764
  Type *EltTy = Ty->getScalarType();
2005
764
  DXIL::OpCode maxOp = DXIL::OpCode::FMax;
2006
764
  DXIL::OpCode minOp = DXIL::OpCode::FMin;
2007
764
  if (IOP == IntrinsicOp::IOP_uclamp) {
2008
56
    maxOp = DXIL::OpCode::UMax;
2009
56
    minOp = DXIL::OpCode::UMin;
2010
708
  } else if (EltTy->isIntegerTy()) {
2011
48
    maxOp = DXIL::OpCode::IMax;
2012
48
    minOp = DXIL::OpCode::IMin;
2013
48
  }
2014
2015
764
  Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx);
2016
764
  Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx);
2017
764
  Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx);
2018
2019
764
  IRBuilder<> Builder(CI);
2020
  // min(max(x, minVal), maxVal).
2021
764
  Value *maxXMinVal =
2022
764
      TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder);
2023
764
  return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder);
2024
764
}
2025
2026
Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2027
                     HLOperationLowerHelper &helper,
2028
                     HLObjectOperationLowerHelper *pObjHelper,
2029
110
                     bool &Translated) {
2030
110
  hlsl::OP *hlslOP = &helper.hlslOP;
2031
110
  Function *discard =
2032
110
      hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext()));
2033
110
  IRBuilder<> Builder(CI);
2034
110
  Value *cond = nullptr;
2035
110
  Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2036
110
  if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) {
2037
14
    Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0);
2038
14
    cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
2039
50
    for (unsigned i = 1; i < VT->getNumElements(); 
i++36
) {
2040
36
      Value *elt = Builder.CreateExtractElement(arg, i);
2041
36
      Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0));
2042
36
      cond = Builder.CreateOr(cond, eltCond);
2043
36
    }
2044
14
  } else
2045
96
    cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0));
2046
2047
  /*If discard condition evaluates to false at compile-time, then
2048
  don't emit the discard instruction.*/
2049
110
  if (ConstantInt *constCond = dyn_cast<ConstantInt>(cond))
2050
78
    if (!constCond->getLimitedValue())
2051
10
      return nullptr;
2052
2053
100
  Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard);
2054
100
  Builder.CreateCall(discard, {opArg, cond});
2055
100
  return nullptr;
2056
110
}
2057
2058
Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2059
                      HLOperationLowerHelper &helper,
2060
                      HLObjectOperationLowerHelper *pObjHelper,
2061
104
                      bool &Translated) {
2062
104
  VectorType *VT = cast<VectorType>(CI->getType());
2063
104
  DXASSERT_NOMSG(VT->getNumElements() == 3);
2064
2065
104
  Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2066
104
  Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2067
2068
104
  IRBuilder<> Builder(CI);
2069
104
  Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0);
2070
104
  Value *op0_y = Builder.CreateExtractElement(op0, 1);
2071
104
  Value *op0_z = Builder.CreateExtractElement(op0, 2);
2072
2073
104
  Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0);
2074
104
  Value *op1_y = Builder.CreateExtractElement(op1, 1);
2075
104
  Value *op1_z = Builder.CreateExtractElement(op1, 2);
2076
2077
312
  auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
2078
312
    Value *xy = Builder.CreateFMul(x0, y1);
2079
312
    Value *yx = Builder.CreateFMul(y0, x1);
2080
312
    return Builder.CreateFSub(xy, yx);
2081
312
  };
2082
2083
104
  Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
2084
104
  Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
2085
104
  Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
2086
2087
104
  Value *cross = UndefValue::get(VT);
2088
104
  cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
2089
104
  cross = Builder.CreateInsertElement(cross, zx_xz, 1);
2090
104
  cross = Builder.CreateInsertElement(cross, xy_yx, 2);
2091
104
  return cross;
2092
104
}
2093
2094
Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2095
                        HLOperationLowerHelper &helper,
2096
                        HLObjectOperationLowerHelper *pObjHelper,
2097
32
                        bool &Translated) {
2098
32
  IRBuilder<> Builder(CI);
2099
32
  Type *Ty = CI->getType();
2100
32
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2101
  // 180/pi.
2102
32
  Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI);
2103
32
  if (Ty != Ty->getScalarType()) {
2104
16
    toDegreeConst =
2105
16
        ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst);
2106
16
  }
2107
32
  return Builder.CreateFMul(toDegreeConst, val);
2108
32
}
2109
2110
Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2111
                    HLOperationLowerHelper &helper,
2112
                    HLObjectOperationLowerHelper *pObjHelper,
2113
16
                    bool &Translated) {
2114
16
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2115
16
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2116
16
  Type *Ty = src1->getType();
2117
16
  IRBuilder<> Builder(CI);
2118
16
  Value *Result = UndefValue::get(Ty);
2119
16
  Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1);
2120
  // dest.x = 1;
2121
16
  Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
2122
  // dest.y = src0.y * src1.y;
2123
16
  Value *src0_y = Builder.CreateExtractElement(src0, 1);
2124
16
  Value *src1_y = Builder.CreateExtractElement(src1, 1);
2125
16
  Value *yMuly = Builder.CreateFMul(src0_y, src1_y);
2126
16
  Result = Builder.CreateInsertElement(Result, yMuly, 1);
2127
  // dest.z = src0.z;
2128
16
  Value *src0_z = Builder.CreateExtractElement(src0, 2);
2129
16
  Result = Builder.CreateInsertElement(Result, src0_z, 2);
2130
  // dest.w = src1.w;
2131
16
  Value *src1_w = Builder.CreateExtractElement(src1, 3);
2132
16
  Result = Builder.CreateInsertElement(Result, src1_w, 3);
2133
16
  return Result;
2134
16
}
2135
2136
Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2137
                           HLOperationLowerHelper &helper,
2138
                           HLObjectOperationLowerHelper *pObjHelper,
2139
204
                           bool &Translated) {
2140
204
  hlsl::OP *OP = &helper.hlslOP;
2141
204
  IRBuilder<> Builder(CI);
2142
204
  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2143
2144
204
  Type *Ty = Src->getType();
2145
204
  Type *RetTy = Type::getInt32Ty(CI->getContext());
2146
204
  unsigned NumElements = 0;
2147
204
  if (Ty->isVectorTy()) {
2148
38
    NumElements = Ty->getVectorNumElements();
2149
38
    RetTy = VectorType::get(RetTy, NumElements);
2150
38
  }
2151
2152
204
  Constant *OpArg = OP->GetU32Const((unsigned)opcode);
2153
204
  Value *Args[] = {OpArg, Src};
2154
2155
204
  Value *FirstbitHi =
2156
204
      TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
2157
2158
204
  IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
2159
204
  Constant *Neg1 = Builder.getInt32(-1);
2160
204
  Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1);
2161
2162
204
  if (NumElements > 0) {
2163
38
    Neg1 = ConstantVector::getSplat(NumElements, Neg1);
2164
38
    BitWidth = ConstantVector::getSplat(NumElements, BitWidth);
2165
38
  }
2166
2167
204
  Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi);
2168
204
  Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi);
2169
204
  return Builder.CreateSelect(Cond, Neg1, Sub);
2170
204
}
2171
2172
Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2173
                           HLOperationLowerHelper &helper,
2174
                           HLObjectOperationLowerHelper *pObjHelper,
2175
178
                           bool &Translated) {
2176
178
  hlsl::OP *OP = &helper.hlslOP;
2177
178
  IRBuilder<> Builder(CI);
2178
178
  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2179
2180
178
  Type *Ty = Src->getType();
2181
178
  Type *RetTy = Type::getInt32Ty(CI->getContext());
2182
178
  if (Ty->isVectorTy())
2183
40
    RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
2184
2185
178
  Constant *OpArg = OP->GetU32Const((unsigned)opcode);
2186
178
  Value *Args[] = {OpArg, Src};
2187
2188
178
  Value *FirstbitLo =
2189
178
      TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
2190
2191
178
  return FirstbitLo;
2192
178
}
2193
2194
Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2195
                    HLOperationLowerHelper &helper,
2196
                    HLObjectOperationLowerHelper *pObjHelper,
2197
24
                    bool &Translated) {
2198
24
  Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
2199
24
  Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
2200
24
  Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
2201
24
  IRBuilder<> Builder(CI);
2202
2203
24
  Type *Ty = m->getType();
2204
24
  Value *Result = UndefValue::get(VectorType::get(Ty, 4));
2205
  // Result = (ambient, diffuse, specular, 1)
2206
  // ambient = 1.
2207
24
  Constant *oneConst = ConstantFP::get(Ty, 1);
2208
24
  Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0);
2209
  // Result.w = 1.
2210
24
  Result = Builder.CreateInsertElement(Result, oneConst, 3);
2211
  // diffuse = (n_dot_l < 0) ? 0 : n_dot_l.
2212
24
  Constant *zeroConst = ConstantFP::get(Ty, 0);
2213
24
  Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst);
2214
24
  Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l);
2215
24
  Result = Builder.CreateInsertElement(Result, diffuse, 1);
2216
  // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m).
2217
24
  Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst);
2218
24
  Value *specCond = Builder.CreateOr(nlCmp, nhCmp);
2219
24
  bool isFXCCompatMode =
2220
24
      CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
2221
24
  Value *nhPowM =
2222
24
      TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode);
2223
24
  Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM);
2224
24
  Result = Builder.CreateInsertElement(Result, spec, 2);
2225
24
  return Result;
2226
24
}
2227
2228
Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2229
                        HLOperationLowerHelper &helper,
2230
                        HLObjectOperationLowerHelper *pObjHelper,
2231
36
                        bool &Translated) {
2232
36
  IRBuilder<> Builder(CI);
2233
36
  Type *Ty = CI->getType();
2234
36
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2235
  // pi/180.
2236
36
  Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180);
2237
36
  if (Ty != Ty->getScalarType()) {
2238
20
    toRadianConst =
2239
20
        ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst);
2240
20
  }
2241
36
  return Builder.CreateFMul(toRadianConst, val);
2242
36
}
2243
2244
Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2245
                         HLOperationLowerHelper &helper,
2246
                         HLObjectOperationLowerHelper *pObjHelper,
2247
384
                         bool &Translated) {
2248
384
  IRBuilder<> Builder(CI);
2249
2250
384
  Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2251
384
  Type *Ty = CI->getType();
2252
2253
384
  Function *f16tof32 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
2254
384
  return TrivialDxilOperation(
2255
384
      f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
2256
384
      x->getType(), Ty, &helper.hlslOP, Builder);
2257
384
}
2258
2259
Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2260
                         HLOperationLowerHelper &helper,
2261
                         HLObjectOperationLowerHelper *pObjHelper,
2262
304
                         bool &Translated) {
2263
304
  IRBuilder<> Builder(CI);
2264
2265
304
  Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2266
304
  Type *Ty = CI->getType();
2267
2268
304
  Function *f32tof16 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy);
2269
304
  return TrivialDxilOperation(
2270
304
      f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x},
2271
304
      x->getType(), Ty, &helper.hlslOP, Builder);
2272
304
}
2273
2274
282
Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) {
2275
282
  IRBuilder<> Builder(CI);
2276
282
  if (VectorType *VT = dyn_cast<VectorType>(val->getType())) {
2277
282
    Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0);
2278
282
    unsigned size = VT->getNumElements();
2279
282
    if (size > 1) {
2280
282
      Value *Sum = Builder.CreateFMul(Elt, Elt);
2281
710
      for (unsigned i = 1; i < size; 
i++428
) {
2282
428
        Elt = Builder.CreateExtractElement(val, i);
2283
428
        Value *Mul = Builder.CreateFMul(Elt, Elt);
2284
428
        Sum = Builder.CreateFAdd(Sum, Mul);
2285
428
      }
2286
282
      DXIL::OpCode sqrt = DXIL::OpCode::Sqrt;
2287
282
      Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType());
2288
282
      Value *opArg = hlslOP->GetI32Const((unsigned)sqrt);
2289
282
      return Builder.CreateCall(dxilSqrt, {opArg, Sum},
2290
282
                                hlslOP->GetOpCodeName(sqrt));
2291
282
    } else {
2292
0
      val = Elt;
2293
0
    }
2294
282
  }
2295
0
  DXIL::OpCode fabs = DXIL::OpCode::FAbs;
2296
0
  Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType());
2297
0
  Value *opArg = hlslOP->GetI32Const((unsigned)fabs);
2298
0
  return Builder.CreateCall(dxilFAbs, {opArg, val},
2299
0
                            hlslOP->GetOpCodeName(fabs));
2300
282
}
2301
2302
Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2303
                       HLOperationLowerHelper &helper,
2304
                       HLObjectOperationLowerHelper *pObjHelper,
2305
226
                       bool &Translated) {
2306
226
  hlsl::OP *hlslOP = &helper.hlslOP;
2307
226
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2308
226
  return TranslateLength(CI, val, hlslOP);
2309
226
}
2310
2311
Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2312
                     HLOperationLowerHelper &helper,
2313
                     HLObjectOperationLowerHelper *pObjHelper,
2314
64
                     bool &Translated) {
2315
64
  hlsl::OP *hlslOP = &helper.hlslOP;
2316
64
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2317
64
  Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2318
64
  IRBuilder<> Builder(CI);
2319
64
  Value *intP =
2320
64
      TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder);
2321
64
  Value *fracP = Builder.CreateFSub(val, intP);
2322
64
  Builder.CreateStore(intP, outIntPtr);
2323
64
  return fracP;
2324
64
}
2325
2326
Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2327
                         HLOperationLowerHelper &helper,
2328
                         HLObjectOperationLowerHelper *pObjHelper,
2329
56
                         bool &Translated) {
2330
56
  hlsl::OP *hlslOP = &helper.hlslOP;
2331
56
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2332
56
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2333
56
  IRBuilder<> Builder(CI);
2334
56
  Value *sub = Builder.CreateFSub(src0, src1);
2335
56
  return TranslateLength(CI, sub, hlslOP);
2336
56
}
2337
2338
Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2339
                    HLOperationLowerHelper &helper,
2340
                    HLObjectOperationLowerHelper *pObjHelper,
2341
44
                    bool &Translated) {
2342
44
  hlsl::OP *hlslOP = &helper.hlslOP;
2343
44
  IRBuilder<> Builder(CI);
2344
44
  Type *Ty = CI->getType();
2345
44
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2346
44
  Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E);
2347
44
  if (Ty != Ty->getScalarType()) {
2348
20
    log2eConst =
2349
20
        ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst);
2350
20
  }
2351
44
  val = Builder.CreateFMul(log2eConst, val);
2352
44
  Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder);
2353
44
  return exp;
2354
44
}
2355
2356
Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2357
                    HLOperationLowerHelper &helper,
2358
                    HLObjectOperationLowerHelper *pObjHelper,
2359
56
                    bool &Translated) {
2360
56
  hlsl::OP *hlslOP = &helper.hlslOP;
2361
56
  IRBuilder<> Builder(CI);
2362
56
  Type *Ty = CI->getType();
2363
56
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2364
56
  Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2);
2365
56
  if (Ty != Ty->getScalarType()) {
2366
20
    ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const);
2367
20
  }
2368
56
  Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
2369
2370
56
  return Builder.CreateFMul(ln2Const, log);
2371
56
}
2372
2373
Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2374
                      HLOperationLowerHelper &helper,
2375
                      HLObjectOperationLowerHelper *pObjHelper,
2376
24
                      bool &Translated) {
2377
24
  hlsl::OP *hlslOP = &helper.hlslOP;
2378
24
  IRBuilder<> Builder(CI);
2379
24
  Type *Ty = CI->getType();
2380
24
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2381
24
  Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10);
2382
24
  if (Ty != Ty->getScalarType()) {
2383
8
    log2_10Const =
2384
8
        ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const);
2385
8
  }
2386
24
  Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder);
2387
2388
24
  return Builder.CreateFMul(log2_10Const, log);
2389
24
}
2390
2391
Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2392
                     HLOperationLowerHelper &helper,
2393
                     HLObjectOperationLowerHelper *pObjHelper,
2394
72
                     bool &Translated) {
2395
72
  hlsl::OP *hlslOP = &helper.hlslOP;
2396
72
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2397
72
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2398
72
  IRBuilder<> Builder(CI);
2399
72
  Value *div = Builder.CreateFDiv(src0, src1);
2400
72
  Value *negDiv = Builder.CreateFNeg(div);
2401
72
  Value *ge = Builder.CreateFCmpOGE(div, negDiv);
2402
72
  Value *absDiv =
2403
72
      TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder);
2404
72
  Value *frc =
2405
72
      TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder);
2406
72
  Value *negFrc = Builder.CreateFNeg(frc);
2407
72
  Value *realFrc = Builder.CreateSelect(ge, frc, negFrc);
2408
72
  return Builder.CreateFMul(realFrc, src1);
2409
72
}
2410
2411
Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2412
                          HLOperationLowerHelper &helper,
2413
                          HLObjectOperationLowerHelper *pObjHelper,
2414
2.49k
                          bool &Translated) {
2415
2.49k
  bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
2416
2.49k
  if (isFloat) {
2417
1.33k
    switch (IOP) {
2418
832
    case IntrinsicOp::IOP_max:
2419
832
      opcode = OP::OpCode::FMax;
2420
832
      break;
2421
498
    case IntrinsicOp::IOP_min:
2422
498
    default:
2423
498
      DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min);
2424
498
      opcode = OP::OpCode::FMin;
2425
498
      break;
2426
1.33k
    }
2427
1.33k
  }
2428
2.49k
  return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper,
2429
2.49k
                                Translated);
2430
2.49k
}
2431
2432
Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2433
                           HLOperationLowerHelper &helper,
2434
                           HLObjectOperationLowerHelper *pObjHelper,
2435
11.9k
                           bool &Translated) {
2436
11.9k
  bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy();
2437
11.9k
  if (isFloat) {
2438
11.3k
    switch (IOP) {
2439
11.3k
    case IntrinsicOp::IOP_mad:
2440
11.3k
    default:
2441
11.3k
      DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad);
2442
11.3k
      opcode = OP::OpCode::FMad;
2443
11.3k
      break;
2444
11.3k
    }
2445
11.3k
  }
2446
11.9k
  return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper,
2447
11.9k
                                 Translated);
2448
11.9k
}
2449
2450
Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2451
                      HLOperationLowerHelper &helper,
2452
                      HLObjectOperationLowerHelper *pObjHelper,
2453
60
                      bool &Translated) {
2454
60
  hlsl::OP *hlslOP = &helper.hlslOP;
2455
60
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2456
60
  Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2457
60
  IRBuilder<> Builder(CI);
2458
60
  Type *i32Ty = Type::getInt32Ty(CI->getContext());
2459
60
  Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000);
2460
60
  Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff);
2461
60
  Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23);
2462
60
  Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000);
2463
60
  Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000);
2464
60
  Constant *zeroVal = hlslOP->GetFloatConst(0);
2465
  // int iVal = asint(val);
2466
60
  Type *dstTy = i32Ty;
2467
60
  Type *Ty = val->getType();
2468
60
  if (Ty->isVectorTy()) {
2469
28
    unsigned vecSize = Ty->getVectorNumElements();
2470
28
    dstTy = VectorType::get(i32Ty, vecSize);
2471
28
    exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst);
2472
28
    mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst);
2473
28
    exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst);
2474
28
    mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst);
2475
28
    exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst);
2476
28
    zeroVal = ConstantVector::getSplat(vecSize, zeroVal);
2477
28
  }
2478
2479
  // bool ne = val != 0;
2480
60
  Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
2481
60
  notZero = Builder.CreateSExt(notZero, dstTy);
2482
2483
60
  Value *intVal = Builder.CreateBitCast(val, dstTy);
2484
  // temp = intVal & exponentMask;
2485
60
  Value *temp = Builder.CreateAnd(intVal, exponentMaskConst);
2486
  // temp = temp + exponentBias;
2487
60
  temp = Builder.CreateAdd(temp, exponentBiasConst);
2488
  // temp = temp & ne;
2489
60
  temp = Builder.CreateAnd(temp, notZero);
2490
  // temp = temp >> exponentShift;
2491
60
  temp = Builder.CreateAShr(temp, exponentShiftConst);
2492
  // exp = float(temp);
2493
60
  Value *exp = Builder.CreateSIToFP(temp, Ty);
2494
60
  Builder.CreateStore(exp, expPtr);
2495
  // temp = iVal & mantisaMask;
2496
60
  temp = Builder.CreateAnd(intVal, mantisaMaskConst);
2497
  // temp = temp | mantisaOr;
2498
60
  temp = Builder.CreateOr(temp, mantisaOrConst);
2499
  // mantisa = temp & ne;
2500
60
  Value *mantisa = Builder.CreateAnd(temp, notZero);
2501
60
  return Builder.CreateBitCast(mantisa, Ty);
2502
60
}
2503
2504
Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2505
                      HLOperationLowerHelper &helper,
2506
                      HLObjectOperationLowerHelper *pObjHelper,
2507
38
                      bool &Translated) {
2508
38
  hlsl::OP *hlslOP = &helper.hlslOP;
2509
38
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2510
38
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2511
38
  IRBuilder<> Builder(CI);
2512
38
  Value *exp =
2513
38
      TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder);
2514
38
  return Builder.CreateFMul(exp, src0);
2515
38
}
2516
2517
Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2518
                       HLOperationLowerHelper &helper,
2519
                       HLObjectOperationLowerHelper *pObjHelper,
2520
36
                       bool &Translated) {
2521
36
  hlsl::OP *hlslOP = &helper.hlslOP;
2522
36
  Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2523
36
  IRBuilder<> Builder(CI);
2524
36
  Value *ddx =
2525
36
      TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder);
2526
36
  Value *absDdx =
2527
36
      TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder);
2528
36
  Value *ddy =
2529
36
      TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder);
2530
36
  Value *absDdy =
2531
36
      TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder);
2532
36
  return Builder.CreateFAdd(absDdx, absDdy);
2533
36
}
2534
2535
Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2536
                     HLOperationLowerHelper &helper,
2537
                     HLObjectOperationLowerHelper *pObjHelper,
2538
348
                     bool &Translated) {
2539
  // x + s(y-x)
2540
348
  Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx);
2541
348
  Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx);
2542
348
  IRBuilder<> Builder(CI);
2543
348
  Value *ySubx = Builder.CreateFSub(y, x);
2544
348
  Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx);
2545
348
  Value *sMulSub = Builder.CreateFMul(s, ySubx);
2546
348
  return Builder.CreateFAdd(x, sMulSub);
2547
348
}
2548
2549
Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1,
2550
2.38k
                           hlsl::OP *hlslOP, IRBuilder<> &Builder) {
2551
2.38k
  Type *Ty = src0->getType()->getScalarType();
2552
2.38k
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty);
2553
2.38k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
2554
2555
2.38k
  SmallVector<Value *, 9> args;
2556
2.38k
  args.emplace_back(opArg);
2557
2558
2.38k
  unsigned vecSize = src0->getType()->getVectorNumElements();
2559
9.76k
  for (unsigned i = 0; i < vecSize; 
i++7.38k
)
2560
7.38k
    args.emplace_back(Builder.CreateExtractElement(src0, i));
2561
2562
9.76k
  for (unsigned i = 0; i < vecSize; 
i++7.38k
)
2563
7.38k
    args.emplace_back(Builder.CreateExtractElement(src1, i));
2564
2.38k
  Value *dotOP = Builder.CreateCall(dxilFunc, args);
2565
2566
2.38k
  return dotOP;
2567
2.38k
}
2568
2569
// Instead of using a DXIL intrinsic, implement a dot product operation using
2570
// multiply and add operations. Used for integer dots and long vectors.
2571
Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP,
2572
                 IRBuilder<> &Builder,
2573
366
                 DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) {
2574
366
  Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
2575
366
  Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
2576
366
  Value *Result;
2577
366
  if (Elt0->getType()->isFloatingPointTy())
2578
0
    Result = Builder.CreateFMul(Elt0, Elt1);
2579
366
  else
2580
366
    Result = Builder.CreateMul(Elt0, Elt1);
2581
1.24k
  for (unsigned Elt = 1; Elt < vecSize; 
++Elt876
) {
2582
876
    Elt0 = Builder.CreateExtractElement(arg0, Elt);
2583
876
    Elt1 = Builder.CreateExtractElement(arg1, Elt);
2584
876
    Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP,
2585
876
                                         Builder);
2586
876
  }
2587
2588
366
  return Result;
2589
366
}
2590
2591
Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize,
2592
2.41k
                     hlsl::OP *hlslOP, IRBuilder<> &Builder) {
2593
2.41k
  switch (vecSize) {
2594
198
  case 2:
2595
198
    return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder);
2596
0
    break;
2597
1.76k
  case 3:
2598
1.76k
    return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder);
2599
0
    break;
2600
426
  case 4:
2601
426
    return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder);
2602
0
    break;
2603
28
  default:
2604
28
    DXASSERT(vecSize == 1, "wrong vector size");
2605
28
    {
2606
28
      Value *vecMul = Builder.CreateFMul(arg0, arg1);
2607
28
      return Builder.CreateExtractElement(vecMul, (uint64_t)0);
2608
0
    }
2609
0
    break;
2610
2.41k
  }
2611
2.41k
}
2612
2613
Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2614
                    HLOperationLowerHelper &helper,
2615
                    HLObjectOperationLowerHelper *pObjHelper,
2616
1.98k
                    bool &Translated) {
2617
1.98k
  hlsl::OP *hlslOP = &helper.hlslOP;
2618
1.98k
  Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2619
1.98k
  Type *Ty = arg0->getType();
2620
1.98k
  Type *EltTy = Ty->getScalarType();
2621
2622
  // SM6.9 introduced a DXIL operation for vectorized dot product
2623
  // The operation is only advantageous for vect size>1, vec1s will be
2624
  // lowered to a single Mul.
2625
1.98k
  if (hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
2626
1.98k
      
EltTy->isFloatingPointTy()20
&&
Ty->getVectorNumElements() > 120
) {
2627
18
    Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2628
18
    IRBuilder<> Builder(CI);
2629
18
    Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::FDot);
2630
18
    Value *args[] = {opArg, arg0, arg1};
2631
18
    Function *dxilFunc = hlslOP->GetOpFunc(DXIL::OpCode::FDot, Ty);
2632
18
    return TrivialDxilVectorOperation(dxilFunc, DXIL::OpCode::FDot, args, Ty,
2633
18
                                      hlslOP, Builder);
2634
18
  }
2635
2636
1.96k
  unsigned vecSize = Ty->getVectorNumElements();
2637
1.96k
  Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2638
1.96k
  IRBuilder<> Builder(CI);
2639
1.96k
  if (EltTy->isFloatingPointTy() && 
Ty->getVectorNumElements() <= 41.64k
)
2640
1.64k
    return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
2641
2642
320
  DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
2643
320
  if (IOP == IntrinsicOp::IOP_udot)
2644
224
    MadOpCode = DXIL::OpCode::UMad;
2645
96
  else if (EltTy->isFloatingPointTy())
2646
0
    MadOpCode = DXIL::OpCode::FMad;
2647
320
  return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
2648
1.96k
}
2649
2650
Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2651
                          HLOperationLowerHelper &helper,
2652
                          HLObjectOperationLowerHelper *pObjHelper,
2653
648
                          bool &Translated) {
2654
648
  hlsl::OP *hlslOP = &helper.hlslOP;
2655
648
  Type *Ty = CI->getType();
2656
648
  Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2657
648
  VectorType *VT = cast<VectorType>(Ty);
2658
648
  unsigned vecSize = VT->getNumElements();
2659
2660
648
  IRBuilder<> Builder(CI);
2661
648
  Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder);
2662
648
  DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt;
2663
648
  Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType());
2664
648
  Value *rsqrt = Builder.CreateCall(
2665
648
      dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot},
2666
648
      hlslOP->GetOpCodeName(rsqrtOp));
2667
648
  Value *vecRsqrt = UndefValue::get(VT);
2668
2.60k
  for (unsigned i = 0; i < VT->getNumElements(); 
i++1.95k
)
2669
1.95k
    vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i);
2670
2671
648
  return Builder.CreateFMul(op, vecRsqrt);
2672
648
}
2673
2674
Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2675
                        HLOperationLowerHelper &helper,
2676
                        HLObjectOperationLowerHelper *pObjHelper,
2677
16
                        bool &Translated) {
2678
16
  hlsl::OP *hlslOP = &helper.hlslOP;
2679
  //  v = i - 2 * n * dot(i, n).
2680
16
  IRBuilder<> Builder(CI);
2681
16
  Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx);
2682
16
  Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx);
2683
2684
16
  VectorType *VT = cast<VectorType>(i->getType());
2685
16
  unsigned vecSize = VT->getNumElements();
2686
16
  Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
2687
  // 2 * dot (i, n).
2688
16
  dot = Builder.CreateFMul(ConstantFP::get(dot->getType(), 2.0), dot);
2689
  // 2 * n * dot(i, n).
2690
16
  Value *vecDot = Builder.CreateVectorSplat(vecSize, dot);
2691
16
  Value *nMulDot = Builder.CreateFMul(vecDot, n);
2692
  // i - 2 * n * dot(i, n).
2693
16
  return Builder.CreateFSub(i, nMulDot);
2694
16
}
2695
2696
Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2697
                        HLOperationLowerHelper &helper,
2698
                        HLObjectOperationLowerHelper *pObjHelper,
2699
46
                        bool &Translated) {
2700
46
  hlsl::OP *hlslOP = &helper.hlslOP;
2701
  //  d = dot(i, n);
2702
  //  t = 1 - eta * eta * ( 1 - d*d);
2703
  //  cond = t >= 1;
2704
  //  r = eta * i - (eta * d + sqrt(t)) * n;
2705
  //  return cond ? r : 0;
2706
46
  IRBuilder<> Builder(CI);
2707
46
  Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx);
2708
46
  Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx);
2709
46
  Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx);
2710
2711
46
  VectorType *VT = cast<VectorType>(i->getType());
2712
46
  unsigned vecSize = VT->getNumElements();
2713
46
  Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder);
2714
  // eta * eta;
2715
46
  Value *eta2 = Builder.CreateFMul(eta, eta);
2716
  // d*d;
2717
46
  Value *dot2 = Builder.CreateFMul(dot, dot);
2718
46
  Constant *one = ConstantFP::get(eta->getType(), 1);
2719
46
  Constant *zero = ConstantFP::get(eta->getType(), 0);
2720
  // 1- d*d;
2721
46
  dot2 = Builder.CreateFSub(one, dot2);
2722
  // eta * eta * (1-d*d);
2723
46
  eta2 = Builder.CreateFMul(dot2, eta2);
2724
  // t = 1 - eta * eta * ( 1 - d*d);
2725
46
  Value *t = Builder.CreateFSub(one, eta2);
2726
  // cond = t >= 0;
2727
46
  Value *cond = Builder.CreateFCmpOGE(t, zero);
2728
  // eta * i;
2729
46
  Value *vecEta = UndefValue::get(VT);
2730
176
  for (unsigned i = 0; i < vecSize; 
i++130
)
2731
130
    vecEta = Builder.CreateInsertElement(vecEta, eta, i);
2732
46
  Value *etaMulI = Builder.CreateFMul(i, vecEta);
2733
  // sqrt(t);
2734
46
  Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder);
2735
  // eta * d;
2736
46
  Value *etaMulD = Builder.CreateFMul(eta, dot);
2737
  // eta * d + sqrt(t);
2738
46
  Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt);
2739
  // (eta * d + sqrt(t)) * n;
2740
46
  Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt);
2741
46
  Value *r = Builder.CreateFMul(vecEtaSqrt, n);
2742
  // r = eta * i - (eta * d + sqrt(t)) * n;
2743
46
  r = Builder.CreateFSub(etaMulI, r);
2744
46
  Value *refract =
2745
46
      Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero));
2746
46
  return refract;
2747
46
}
2748
2749
Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2750
                           HLOperationLowerHelper &helper,
2751
                           HLObjectOperationLowerHelper *pObjHelper,
2752
60
                           bool &Translated) {
2753
60
  hlsl::OP *hlslOP = &helper.hlslOP;
2754
  // s = saturate((x-min)/(max-min)).
2755
60
  IRBuilder<> Builder(CI);
2756
60
  Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx);
2757
60
  Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx);
2758
60
  Value *maxSubMin = Builder.CreateFSub(maxVal, minVal);
2759
60
  Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx);
2760
60
  Value *xSubMin = Builder.CreateFSub(x, minVal);
2761
60
  Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin);
2762
2763
60
  Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP,
2764
60
                                       Builder);
2765
  // return s * s *(3-2*s).
2766
60
  Constant *c2 = ConstantFP::get(CI->getType(), 2);
2767
60
  Constant *c3 = ConstantFP::get(CI->getType(), 3);
2768
2769
60
  Value *sMul2 = Builder.CreateFMul(s, c2);
2770
60
  Value *result = Builder.CreateFSub(c3, sMul2);
2771
60
  result = Builder.CreateFMul(s, result);
2772
60
  result = Builder.CreateFMul(s, result);
2773
60
  return result;
2774
60
}
2775
2776
Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2777
                      HLOperationLowerHelper &helper,
2778
                      HLObjectOperationLowerHelper *pObjHelper,
2779
16
                      bool &Translated) {
2780
16
  hlsl::OP *hlslOP = &helper.hlslOP;
2781
16
  Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
2782
16
  Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
2783
16
  Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
2784
16
  Type *Ty = CI->getType();
2785
16
  IRBuilder<> Builder(CI);
2786
16
  Value *vecRef = UndefValue::get(Ty);
2787
80
  for (unsigned i = 0; i < 4; 
i++64
)
2788
64
    vecRef = Builder.CreateInsertElement(vecRef, ref, i);
2789
2790
16
  Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0);
2791
16
  Value *srcY = Builder.CreateExtractElement(src, 1);
2792
2793
16
  Value *byteSrc = UndefValue::get(Ty);
2794
16
  byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0);
2795
2796
  // ushr r0.yzw, srcX, l(0, 8, 16, 24)
2797
  // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw
2798
16
  Value *bfiOpArg =
2799
16
      hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi));
2800
2801
16
  Value *imm8 = hlslOP->GetU32Const(8);
2802
16
  Value *imm16 = hlslOP->GetU32Const(16);
2803
16
  Value *imm24 = hlslOP->GetU32Const(24);
2804
2805
16
  Ty = ref->getType();
2806
  // Get x[31:8].
2807
16
  Value *srcXShift = Builder.CreateLShr(srcX, imm8);
2808
  // y[0~7] x[31:8].
2809
16
  Value *byteSrcElt = TrivialDxilOperation(
2810
16
      DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty,
2811
16
      hlslOP, Builder);
2812
16
  byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1);
2813
  // Get x[31:16].
2814
16
  srcXShift = Builder.CreateLShr(srcXShift, imm8);
2815
  // y[0~15] x[31:16].
2816
16
  byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
2817
16
                                    {bfiOpArg, imm16, imm16, srcY, srcXShift},
2818
16
                                    Ty, Ty, hlslOP, Builder);
2819
16
  byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2);
2820
  // Get x[31:24].
2821
16
  srcXShift = Builder.CreateLShr(srcXShift, imm8);
2822
  // y[0~23] x[31:24].
2823
16
  byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi,
2824
16
                                    {bfiOpArg, imm24, imm8, srcY, srcXShift},
2825
16
                                    Ty, Ty, hlslOP, Builder);
2826
16
  byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3);
2827
2828
  // Msad on vecref and byteSrc.
2829
16
  return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum,
2830
16
                                     hlslOP, Builder);
2831
16
}
2832
2833
Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2834
                    HLOperationLowerHelper &helper,
2835
                    HLObjectOperationLowerHelper *pObjHelper,
2836
76
                    bool &Translated) {
2837
76
  Type *Ty = CI->getType();
2838
76
  Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2839
76
  IRBuilder<> Builder(CI);
2840
76
  Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
2841
76
  if (Ty != Ty->getScalarType()) {
2842
56
    one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
2843
56
  }
2844
76
  return Builder.CreateFDiv(one, op);
2845
76
}
2846
2847
Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2848
                     HLOperationLowerHelper &helper,
2849
                     HLObjectOperationLowerHelper *pObjHelper,
2850
180
                     bool &Translated) {
2851
180
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2852
180
  Type *Ty = val->getType();
2853
180
  bool IsInt = Ty->getScalarType()->isIntegerTy();
2854
2855
180
  IRBuilder<> Builder(CI);
2856
180
  Constant *zero = Constant::getNullValue(Ty);
2857
180
  Value *zeroLtVal = IsInt ? 
Builder.CreateICmpSLT(zero, val)44
2858
180
                           : 
Builder.CreateFCmpOLT(zero, val)136
;
2859
180
  Value *valLtZero = IsInt ? 
Builder.CreateICmpSLT(val, zero)44
2860
180
                           : 
Builder.CreateFCmpOLT(val, zero)136
;
2861
180
  zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType());
2862
180
  valLtZero = Builder.CreateZExt(valLtZero, CI->getType());
2863
180
  return Builder.CreateSub(zeroLtVal, valLtZero);
2864
180
}
2865
2866
Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2867
                      HLOperationLowerHelper &helper,
2868
                      HLObjectOperationLowerHelper *pObjHelper,
2869
36
                      bool &Translated) {
2870
36
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2871
36
  Type *Ty = val->getType();
2872
2873
36
  IRBuilder<> Builder(CI);
2874
36
  Constant *zero = Constant::getNullValue(Ty);
2875
36
  Value *nonZero = Builder.CreateICmpNE(val, zero);
2876
36
  return Builder.CreateZExt(nonZero, CI->getType());
2877
36
}
2878
2879
Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2880
                     HLOperationLowerHelper &helper,
2881
                     HLObjectOperationLowerHelper *pObjHelper,
2882
36
                     bool &Translated) {
2883
36
  Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2884
36
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2885
36
  Type *Ty = CI->getType();
2886
36
  IRBuilder<> Builder(CI);
2887
2888
36
  Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0);
2889
36
  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
2890
36
  Value *cond = Builder.CreateFCmpOLT(x, edge);
2891
2892
36
  if (Ty != Ty->getScalarType()) {
2893
20
    one = ConstantVector::getSplat(Ty->getVectorNumElements(), one);
2894
20
    zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero);
2895
20
  }
2896
2897
36
  return Builder.CreateSelect(cond, zero, one);
2898
36
}
2899
2900
Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
2901
                    HLOperationLowerHelper &helper,
2902
                    HLObjectOperationLowerHelper *pObjHelper,
2903
1.43k
                    bool &Translated) {
2904
1.43k
  hlsl::OP *hlslOP = &helper.hlslOP;
2905
1.43k
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2906
1.43k
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2907
1.43k
  bool isFXCCompatMode =
2908
1.43k
      CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode;
2909
1.43k
  IRBuilder<> Builder(CI);
2910
1.43k
  return TranslatePowImpl(hlslOP, Builder, x, y, isFXCCompatMode);
2911
1.43k
}
2912
2913
Value *TranslatePrintf(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
2914
                       HLOperationLowerHelper &helper,
2915
                       HLObjectOperationLowerHelper *pObjHelper,
2916
2
                       bool &Translated) {
2917
2
  Translated = false;
2918
2
  dxilutil::EmitErrorOnInstruction(CI,
2919
2
                                   "use of unsupported identifier 'printf'");
2920
2
  return nullptr;
2921
2
}
2922
2923
Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2924
                            HLOperationLowerHelper &helper,
2925
                            HLObjectOperationLowerHelper *pObjHelper,
2926
16
                            bool &Translated) {
2927
16
  hlsl::OP *hlslOP = &helper.hlslOP;
2928
16
  Type *Ty = CI->getType();
2929
2930
16
  Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
2931
16
  Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
2932
16
  Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
2933
16
  IRBuilder<> Builder(CI);
2934
2935
16
  unsigned vecSize = Ty->getVectorNumElements();
2936
  // -n x sign(dot(i, ng)).
2937
16
  Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder);
2938
2939
16
  Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
2940
16
  Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero);
2941
2942
16
  Value *negN = Builder.CreateFNeg(n);
2943
16
  Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN);
2944
16
  return faceforward;
2945
16
}
2946
2947
Value *TrivialSetMeshOutputCounts(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2948
                                  HLOperationLowerHelper &helper,
2949
                                  HLObjectOperationLowerHelper *pObjHelper,
2950
258
                                  bool &Translated) {
2951
258
  hlsl::OP *hlslOP = &helper.hlslOP;
2952
258
  Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
2953
258
  Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
2954
258
  IRBuilder<> Builder(CI);
2955
258
  Constant *opArg = hlslOP->GetU32Const((unsigned)op);
2956
258
  Value *args[] = {opArg, src0, src1};
2957
258
  Function *dxilFunc = hlslOP->GetOpFunc(op, Type::getVoidTy(CI->getContext()));
2958
2959
258
  Builder.CreateCall(dxilFunc, args);
2960
258
  return nullptr;
2961
258
}
2962
2963
Value *TrivialDispatchMesh(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2964
                           HLOperationLowerHelper &helper,
2965
                           HLObjectOperationLowerHelper *pObjHelper,
2966
260
                           bool &Translated) {
2967
260
  hlsl::OP *hlslOP = &helper.hlslOP;
2968
260
  Value *src0 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadX);
2969
260
  Value *src1 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadY);
2970
260
  Value *src2 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadZ);
2971
260
  Value *src3 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpPayload);
2972
260
  IRBuilder<> Builder(CI);
2973
260
  Constant *opArg = hlslOP->GetU32Const((unsigned)op);
2974
260
  Value *args[] = {opArg, src0, src1, src2, src3};
2975
260
  Function *dxilFunc = hlslOP->GetOpFunc(op, src3->getType());
2976
2977
260
  Builder.CreateCall(dxilFunc, args);
2978
260
  return nullptr;
2979
260
}
2980
} // namespace
2981
2982
// MOP intrinsics
2983
namespace {
2984
2985
Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
2986
                                  HLOperationLowerHelper &helper,
2987
                                  HLObjectOperationLowerHelper *pObjHelper,
2988
48
                                  bool &Translated) {
2989
48
  hlsl::OP *hlslOP = &helper.hlslOP;
2990
48
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
2991
2992
48
  IRBuilder<> Builder(CI);
2993
48
  Value *sampleIdx =
2994
48
      CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex);
2995
2996
48
  OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition;
2997
48
  llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
2998
48
  Function *dxilFunc =
2999
48
      hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
3000
3001
48
  Value *args[] = {opArg, handle, sampleIdx};
3002
48
  Value *samplePos = Builder.CreateCall(dxilFunc, args);
3003
3004
48
  Value *result = UndefValue::get(CI->getType());
3005
48
  Value *samplePosX = Builder.CreateExtractValue(samplePos, 0);
3006
48
  Value *samplePosY = Builder.CreateExtractValue(samplePos, 1);
3007
48
  result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0);
3008
48
  result = Builder.CreateInsertElement(result, samplePosY, 1);
3009
48
  return result;
3010
48
}
3011
3012
Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
3013
                              HLOperationLowerHelper &helper,
3014
                              HLObjectOperationLowerHelper *pObjHelper,
3015
226
                              bool &Translated) {
3016
226
  hlsl::OP *hlslOP = &helper.hlslOP;
3017
3018
226
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3019
226
  DxilResource::Kind RK = pObjHelper->GetRK(handle);
3020
3021
226
  IRBuilder<> Builder(CI);
3022
226
  OP::OpCode opcode = OP::OpCode::GetDimensions;
3023
226
  llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
3024
226
  Function *dxilFunc =
3025
226
      hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext()));
3026
3027
226
  Type *i32Ty = Type::getInt32Ty(CI->getContext());
3028
226
  Value *mipLevel = UndefValue::get(i32Ty);
3029
226
  unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex;
3030
226
  switch (RK) {
3031
0
  case DxilResource::Kind::Texture1D:
3032
0
  case DxilResource::Kind::Texture1DArray:
3033
56
  case DxilResource::Kind::Texture2D:
3034
56
  case DxilResource::Kind::Texture2DArray:
3035
66
  case DxilResource::Kind::TextureCube:
3036
66
  case DxilResource::Kind::TextureCubeArray:
3037
66
  case DxilResource::Kind::Texture3D: {
3038
66
    Value *opMipLevel =
3039
66
        CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex);
3040
    // mipLevel is in parameter, should not be pointer.
3041
66
    if (!opMipLevel->getType()->isPointerTy())
3042
24
      mipLevel = opMipLevel;
3043
42
    else {
3044
      // No mip level.
3045
42
      widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
3046
42
      mipLevel = ConstantInt::get(i32Ty, 0);
3047
42
    }
3048
66
  } break;
3049
160
  default:
3050
160
    widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex;
3051
160
    break;
3052
226
  }
3053
226
  Value *args[] = {opArg, handle, mipLevel};
3054
226
  Value *dims = Builder.CreateCall(dxilFunc, args);
3055
3056
226
  unsigned dimensionIdx = 0;
3057
3058
226
  Value *width = Builder.CreateExtractValue(dims, dimensionIdx++);
3059
226
  Value *widthPtr = CI->getArgOperand(widthOpIdx);
3060
226
  if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy())
3061
8
    width = Builder.CreateSIToFP(width,
3062
8
                                 widthPtr->getType()->getPointerElementType());
3063
3064
226
  Builder.CreateStore(width, widthPtr);
3065
3066
226
  if (DXIL::IsStructuredBuffer(RK)) {
3067
    // Set stride.
3068
52
    Value *stridePtr = CI->getArgOperand(widthOpIdx + 1);
3069
52
    const DataLayout &DL = helper.dataLayout;
3070
52
    Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3071
52
    Type *bufTy = pObjHelper->GetResourceType(handle);
3072
52
    Type *bufRetTy = bufTy->getStructElementType(0);
3073
52
    unsigned stride = DL.getTypeAllocSize(bufRetTy);
3074
52
    Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr);
3075
174
  } else {
3076
174
    if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex ||
3077
        // Samples is in w channel too.
3078
174
        
RK == DXIL::ResourceKind::Texture2DMS150
) {
3079
      // Has mip.
3080
68
      for (unsigned argIdx = widthOpIdx + 1;
3081
136
           argIdx < CI->getNumArgOperands() - 1; 
argIdx++68
) {
3082
68
        Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
3083
68
        Value *ptr = CI->getArgOperand(argIdx);
3084
68
        if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
3085
0
          dim = Builder.CreateSIToFP(dim,
3086
0
                                     ptr->getType()->getPointerElementType());
3087
68
        Builder.CreateStore(dim, ptr);
3088
68
      }
3089
      // NumOfLevel is in w channel.
3090
68
      dimensionIdx = 3;
3091
68
      Value *dim = Builder.CreateExtractValue(dims, dimensionIdx);
3092
68
      Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1);
3093
68
      if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
3094
0
        dim =
3095
0
            Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType());
3096
68
      Builder.CreateStore(dim, ptr);
3097
106
    } else {
3098
292
      for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands();
3099
186
           argIdx++) {
3100
186
        Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++);
3101
186
        Value *ptr = CI->getArgOperand(argIdx);
3102
186
        if (ptr->getType()->getPointerElementType()->isFloatingPointTy())
3103
8
          dim = Builder.CreateSIToFP(dim,
3104
8
                                     ptr->getType()->getPointerElementType());
3105
186
        Builder.CreateStore(dim, ptr);
3106
186
      }
3107
106
    }
3108
174
  }
3109
226
  return nullptr;
3110
226
}
3111
3112
Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3113
                             HLOperationLowerHelper &helper,
3114
                             HLObjectOperationLowerHelper *pObjHelper,
3115
2.94k
                             bool &Translated) {
3116
2.94k
  hlsl::OP *hlslOP = &helper.hlslOP;
3117
2.94k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3118
3119
2.94k
  pObjHelper->MarkHasCounter(handle, helper.i8Ty);
3120
3121
2.94k
  bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter;
3122
2.94k
  IRBuilder<> Builder(CI);
3123
3124
2.94k
  OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter;
3125
2.94k
  Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode);
3126
2.94k
  Value *IncVal = hlslOP->GetI8Const(bInc ? 
12.65k
:
-1286
);
3127
  // Create BufferUpdateCounter call.
3128
2.94k
  Value *Args[] = {OpCodeArg, handle, IncVal};
3129
3130
2.94k
  Function *F =
3131
2.94k
      hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext()));
3132
2.94k
  return Builder.CreateCall(F, Args);
3133
2.94k
}
3134
3135
static Value *ScalarizeResRet(Type *RetTy, Value *ResRet,
3136
5.78k
                              IRBuilder<> &Builder) {
3137
  // Extract value part.
3138
5.78k
  Value *retVal = llvm::UndefValue::get(RetTy);
3139
5.78k
  if (RetTy->isVectorTy()) {
3140
24.1k
    for (unsigned i = 0; i < RetTy->getVectorNumElements(); 
i++19.2k
) {
3141
19.2k
      Value *retComp = Builder.CreateExtractValue(ResRet, i);
3142
19.2k
      retVal = Builder.CreateInsertElement(retVal, retComp, i);
3143
19.2k
    }
3144
4.90k
  } else {
3145
886
    retVal = Builder.CreateExtractValue(ResRet, 0);
3146
886
  }
3147
5.78k
  return retVal;
3148
5.78k
}
3149
3150
void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder,
3151
                  hlsl::OP *hlslOp,
3152
19.8k
                  unsigned StatusIndex = DXIL::kResRetStatusIndex) {
3153
19.8k
  if (status && 
!isa<UndefValue>(status)2.25k
) {
3154
2.25k
    Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex);
3155
2.25k
    Value *checkAccessOp = hlslOp->GetI32Const(
3156
2.25k
        static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped));
3157
2.25k
    Function *checkAccessFn = hlslOp->GetOpFunc(
3158
2.25k
        DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType());
3159
    // CheckAccess on status.
3160
2.25k
    Value *bStatus =
3161
2.25k
        Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal});
3162
2.25k
    Value *extStatus =
3163
2.25k
        Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext()));
3164
2.25k
    Builder.CreateStore(extStatus, status);
3165
2.25k
  }
3166
19.8k
}
3167
3168
3.15k
Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) {
3169
3.15k
  Value *Result = UndefValue::get(DstTy);
3170
10.1k
  for (unsigned i = 0; i < DstTy->getVectorNumElements(); 
i++6.97k
)
3171
6.97k
    Result = Builder.CreateInsertElement(Result, Elt, i);
3172
3.15k
  return Result;
3173
3.15k
}
3174
3175
Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3176
                    HLOperationLowerHelper &helper,
3177
                    HLObjectOperationLowerHelper *pObjHelper,
3178
140
                    bool &Translated) {
3179
3180
140
  hlsl::OP *hlslOP = &helper.hlslOP;
3181
140
  Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
3182
140
  Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
3183
140
  Type *arg0Ty = arg0->getType();
3184
140
  Type *arg1Ty = arg1->getType();
3185
140
  IRBuilder<> Builder(CI);
3186
3187
140
  if (arg0Ty->isVectorTy()) {
3188
104
    if (arg1Ty->isVectorTy()) {
3189
      // mul(vector, vector) == dot(vector, vector)
3190
84
      unsigned vecSize = arg0Ty->getVectorNumElements();
3191
84
      if (arg0Ty->getScalarType()->isFloatingPointTy()) {
3192
38
        return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
3193
38
      }
3194
3195
46
      DXIL::OpCode MadOpCode = DXIL::OpCode::IMad;
3196
46
      if (IOP == IntrinsicOp::IOP_umul)
3197
20
        MadOpCode = DXIL::OpCode::UMad;
3198
46
      return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode);
3199
84
    } else {
3200
      // mul(vector, scalar) == vector * scalar-splat
3201
20
      arg1 = SplatToVector(arg1, arg0Ty, Builder);
3202
20
    }
3203
104
  } else {
3204
36
    if (arg1Ty->isVectorTy()) {
3205
      // mul(scalar, vector) == scalar-splat * vector
3206
24
      arg0 = SplatToVector(arg0, arg1Ty, Builder);
3207
24
    }
3208
    // else mul(scalar, scalar) == scalar * scalar;
3209
36
  }
3210
3211
  // create fmul/mul for the pair of vectors or scalars
3212
56
  if (arg0Ty->getScalarType()->isFloatingPointTy()) {
3213
26
    return Builder.CreateFMul(arg0, arg1);
3214
26
  }
3215
30
  return Builder.CreateMul(arg0, arg1);
3216
56
}
3217
3218
// Sample intrinsics.
3219
struct SampleHelper {
3220
  SampleHelper(CallInst *CI, OP::OpCode op,
3221
               HLObjectOperationLowerHelper *pObjHelper);
3222
3223
  OP::OpCode opcode = OP::OpCode::NumOpCodes;
3224
  DXIL::ResourceKind resourceKind = DXIL::ResourceKind::Invalid;
3225
  Value *sampledTexHandle = nullptr;
3226
  Value *texHandle = nullptr;
3227
  Value *samplerHandle = nullptr;
3228
  static const unsigned kMaxCoordDimensions = 4;
3229
  unsigned coordDimensions = 0;
3230
  Value *coord[kMaxCoordDimensions];
3231
  Value *compareValue = nullptr;
3232
  Value *bias = nullptr;
3233
  Value *lod = nullptr;
3234
  // SampleGrad only.
3235
  static const unsigned kMaxDDXYDimensions = 3;
3236
  Value *ddx[kMaxDDXYDimensions];
3237
  Value *ddy[kMaxDDXYDimensions];
3238
  // Optional.
3239
  static const unsigned kMaxOffsetDimensions = 3;
3240
  unsigned offsetDimensions = 0;
3241
  Value *offset[kMaxOffsetDimensions];
3242
  Value *clamp = nullptr;
3243
  Value *status = nullptr;
3244
  unsigned maxHLOperandRead = 0;
3245
19.5k
  Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
3246
19.5k
    if (CI->getNumArgOperands() > opIdx) {
3247
9.01k
      maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
3248
9.01k
      return CI->getArgOperand(opIdx);
3249
9.01k
    }
3250
10.5k
    return nullptr;
3251
19.5k
  }
3252
4.86k
  void TranslateCoord(CallInst *CI, unsigned coordIdx) {
3253
4.86k
    Value *coordArg = ReadHLOperand(CI, coordIdx);
3254
4.86k
    DXASSERT_NOMSG(coordArg);
3255
4.86k
    DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
3256
4.86k
             "otherwise, HL coordinate dimensions mismatch");
3257
4.86k
    IRBuilder<> Builder(CI);
3258
15.5k
    for (unsigned i = 0; i < coordDimensions; 
i++10.6k
)
3259
10.6k
      coord[i] = Builder.CreateExtractElement(coordArg, i);
3260
4.86k
    Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
3261
13.6k
    for (unsigned i = coordDimensions; i < kMaxCoordDimensions; 
i++8.80k
)
3262
8.80k
      coord[i] = undefF;
3263
4.86k
  }
3264
4.39k
  void TranslateOffset(CallInst *CI, unsigned offsetIdx) {
3265
4.39k
    IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
3266
4.39k
    if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
3267
706
      DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
3268
706
               "otherwise, HL coordinate dimensions mismatch");
3269
706
      IRBuilder<> Builder(CI);
3270
2.07k
      for (unsigned i = 0; i < offsetDimensions; 
i++1.36k
)
3271
1.36k
        offset[i] = Builder.CreateExtractElement(offsetArg, i);
3272
3.68k
    } else {
3273
      // Use zeros for offsets when not specified, not undef.
3274
3.68k
      Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
3275
10.1k
      for (unsigned i = 0; i < offsetDimensions; 
i++6.50k
)
3276
6.50k
        offset[i] = zero;
3277
3.68k
    }
3278
    // Use undef for components that should not be used for this resource dim.
3279
4.39k
    Value *undefI = UndefValue::get(i32Ty);
3280
9.69k
    for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; 
i++5.30k
)
3281
5.30k
      offset[i] = undefI;
3282
4.39k
  }
3283
308
  void SetBias(CallInst *CI, unsigned biasIdx) {
3284
    // Clamp bias for immediate.
3285
308
    bias = ReadHLOperand(CI, biasIdx);
3286
308
    DXASSERT_NOMSG(bias);
3287
308
    if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) {
3288
224
      float v = FP->getValueAPF().convertToFloat();
3289
224
      if (v > DXIL::kMaxMipLodBias)
3290
16
        bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias);
3291
224
      if (v < DXIL::kMinMipLodBias)
3292
24
        bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias);
3293
224
    }
3294
308
  }
3295
1.21k
  void SetLOD(CallInst *CI, unsigned lodIdx) {
3296
1.21k
    lod = ReadHLOperand(CI, lodIdx);
3297
1.21k
    DXASSERT_NOMSG(lod);
3298
1.21k
  }
3299
650
  void SetCompareValue(CallInst *CI, unsigned cmpIdx) {
3300
650
    compareValue = ReadHLOperand(CI, cmpIdx);
3301
650
    DXASSERT_NOMSG(compareValue);
3302
650
  }
3303
3.33k
  void SetClamp(CallInst *CI, unsigned clampIdx) {
3304
3.33k
    if ((clamp = ReadHLOperand(CI, clampIdx))) {
3305
516
      if (clamp->getType()->isVectorTy()) {
3306
0
        IRBuilder<> Builder(CI);
3307
0
        clamp = Builder.CreateExtractElement(clamp, (uint64_t)0);
3308
0
      }
3309
516
    } else
3310
2.82k
      clamp = UndefValue::get(Type::getFloatTy(CI->getContext()));
3311
3.33k
  }
3312
4.39k
  void SetStatus(CallInst *CI, unsigned statusIdx) {
3313
4.39k
    status = ReadHLOperand(CI, statusIdx);
3314
4.39k
  }
3315
200
  void SetDDX(CallInst *CI, unsigned ddxIdx) {
3316
200
    SetDDXY(CI, ddx, ReadHLOperand(CI, ddxIdx));
3317
200
  }
3318
200
  void SetDDY(CallInst *CI, unsigned ddyIdx) {
3319
200
    SetDDXY(CI, ddy, ReadHLOperand(CI, ddyIdx));
3320
200
  }
3321
400
  void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg) {
3322
400
    DXASSERT_NOMSG(ddxyArg);
3323
400
    IRBuilder<> Builder(CI);
3324
400
    unsigned ddxySize = ddxyArg->getType()->getVectorNumElements();
3325
1.32k
    for (unsigned i = 0; i < ddxySize; 
i++928
)
3326
928
      ddxy[i] = Builder.CreateExtractElement(ddxyArg, i);
3327
400
    Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
3328
672
    for (unsigned i = ddxySize; i < kMaxDDXYDimensions; 
i++272
)
3329
272
      ddxy[i] = undefF;
3330
400
  }
3331
};
3332
3333
SampleHelper::SampleHelper(CallInst *CI, OP::OpCode op,
3334
                           HLObjectOperationLowerHelper *pObjHelper)
3335
4.86k
    : opcode(op) {
3336
3337
4.86k
  texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3338
4.86k
  resourceKind = pObjHelper->GetRK(texHandle);
3339
4.86k
  if (resourceKind == DXIL::ResourceKind::Invalid) {
3340
0
    opcode = DXIL::OpCode::NumOpCodes;
3341
0
    return;
3342
0
  }
3343
3344
4.86k
  coordDimensions = opcode == DXIL::OpCode::CalculateLOD
3345
4.86k
                        ? 
DxilResource::GetNumDimensionsForCalcLOD(resourceKind)172
3346
4.86k
                        : 
DxilResource::GetNumCoords(resourceKind)4.69k
;
3347
4.86k
  offsetDimensions = DxilResource::GetNumOffsets(resourceKind);
3348
3349
4.86k
  const bool bFeedbackOp = hlsl::OP::IsDxilOpFeedback(op);
3350
4.86k
  sampledTexHandle =
3351
4.86k
      bFeedbackOp ? CI->getArgOperand(
3352
300
                        HLOperandIndex::kWriteSamplerFeedbackSampledArgIndex)
3353
4.86k
                  : 
nullptr4.56k
;
3354
4.86k
  const unsigned kSamplerArgIndex =
3355
4.86k
      bFeedbackOp ? 
HLOperandIndex::kWriteSamplerFeedbackSamplerArgIndex300
3356
4.86k
                  : 
HLOperandIndex::kSampleSamplerArgIndex4.56k
;
3357
4.86k
  samplerHandle = CI->getArgOperand(kSamplerArgIndex);
3358
3359
4.86k
  const unsigned kCoordArgIdx =
3360
4.86k
      bFeedbackOp ? 
HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex300
3361
4.86k
                  : 
HLOperandIndex::kSampleCoordArgIndex4.56k
;
3362
4.86k
  TranslateCoord(CI, kCoordArgIdx);
3363
3364
  // TextureCube does not support offsets, shifting each subsequent arg index
3365
  // down by 1
3366
4.86k
  unsigned cube = (resourceKind == DXIL::ResourceKind::TextureCube ||
3367
4.86k
                   
resourceKind == DXIL::ResourceKind::TextureCubeArray4.66k
)
3368
4.86k
                      ? 
1402
3369
4.86k
                      : 
04.46k
;
3370
3371
4.86k
  switch (op) {
3372
2.42k
  case OP::OpCode::Sample:
3373
2.42k
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx66
3374
2.42k
                             : 
HLOperandIndex::kSampleOffsetArgIndex2.36k
);
3375
2.42k
    SetClamp(CI, HLOperandIndex::kSampleClampArgIndex - cube);
3376
2.42k
    SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex - cube);
3377
2.42k
    break;
3378
1.00k
  case OP::OpCode::SampleLevel:
3379
1.00k
    SetLOD(CI, HLOperandIndex::kSampleLLevelArgIndex);
3380
1.00k
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx44
3381
1.00k
                             : 
HLOperandIndex::kSampleLOffsetArgIndex956
);
3382
1.00k
    SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex - cube);
3383
1.00k
    break;
3384
196
  case OP::OpCode::SampleBias:
3385
196
    SetBias(CI, HLOperandIndex::kSampleBBiasArgIndex);
3386
196
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx48
3387
196
                             : 
HLOperandIndex::kSampleBOffsetArgIndex148
);
3388
196
    SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex - cube);
3389
196
    SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex - cube);
3390
196
    break;
3391
222
  case OP::OpCode::SampleCmp:
3392
222
    SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex);
3393
222
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx66
3394
222
                             : 
HLOperandIndex::kSampleCmpOffsetArgIndex156
);
3395
222
    SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex - cube);
3396
222
    SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube);
3397
222
    break;
3398
48
  case OP::OpCode::SampleCmpBias:
3399
48
    SetBias(CI, HLOperandIndex::kSampleCmpBBiasArgIndex);
3400
48
    SetCompareValue(CI, HLOperandIndex::kSampleCmpBCmpValArgIndex);
3401
48
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx8
3402
48
                             : 
HLOperandIndex::kSampleCmpBOffsetArgIndex40
);
3403
48
    SetClamp(CI, HLOperandIndex::kSampleCmpBClampArgIndex - cube);
3404
48
    SetStatus(CI, HLOperandIndex::kSampleCmpBStatusArgIndex - cube);
3405
48
    break;
3406
48
  case OP::OpCode::SampleCmpGrad:
3407
48
    SetDDX(CI, HLOperandIndex::kSampleCmpGDDXArgIndex);
3408
48
    SetDDY(CI, HLOperandIndex::kSampleCmpGDDYArgIndex);
3409
48
    SetCompareValue(CI, HLOperandIndex::kSampleCmpGCmpValArgIndex);
3410
48
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx16
3411
48
                             : 
HLOperandIndex::kSampleCmpGOffsetArgIndex32
);
3412
48
    SetClamp(CI, HLOperandIndex::kSampleCmpGClampArgIndex - cube);
3413
48
    SetStatus(CI, HLOperandIndex::kSampleCmpGStatusArgIndex - cube);
3414
48
    break;
3415
192
  case OP::OpCode::SampleCmpLevel:
3416
192
    SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex);
3417
192
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx48
3418
192
                             : 
HLOperandIndex::kSampleCmpLOffsetArgIndex144
);
3419
192
    SetLOD(CI, HLOperandIndex::kSampleCmpLLevelArgIndex);
3420
192
    SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube);
3421
192
    break;
3422
140
  case OP::OpCode::SampleCmpLevelZero:
3423
140
    SetCompareValue(CI, HLOperandIndex::kSampleCmpLZCmpValArgIndex);
3424
140
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx32
3425
140
                             : 
HLOperandIndex::kSampleCmpLZOffsetArgIndex108
);
3426
140
    SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex - cube);
3427
140
    break;
3428
120
  case OP::OpCode::SampleGrad:
3429
120
    SetDDX(CI, HLOperandIndex::kSampleGDDXArgIndex);
3430
120
    SetDDY(CI, HLOperandIndex::kSampleGDDYArgIndex);
3431
120
    TranslateOffset(CI, cube ? 
HLOperandIndex::kInvalidIdx48
3432
120
                             : 
HLOperandIndex::kSampleGOffsetArgIndex72
);
3433
120
    SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex - cube);
3434
120
    SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex - cube);
3435
120
    break;
3436
172
  case OP::OpCode::CalculateLOD:
3437
    // Only need coord for LOD calculation.
3438
172
    break;
3439
180
  case OP::OpCode::WriteSamplerFeedback:
3440
180
    SetClamp(CI, HLOperandIndex::kWriteSamplerFeedback_ClampArgIndex);
3441
180
    break;
3442
64
  case OP::OpCode::WriteSamplerFeedbackBias:
3443
64
    SetBias(CI, HLOperandIndex::kWriteSamplerFeedbackBias_BiasArgIndex);
3444
64
    SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackBias_ClampArgIndex);
3445
64
    break;
3446
32
  case OP::OpCode::WriteSamplerFeedbackGrad:
3447
32
    SetDDX(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdxArgIndex);
3448
32
    SetDDY(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdyArgIndex);
3449
32
    SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_ClampArgIndex);
3450
32
    break;
3451
24
  case OP::OpCode::WriteSamplerFeedbackLevel:
3452
24
    SetLOD(CI, HLOperandIndex::kWriteSamplerFeedbackLevel_LodArgIndex);
3453
24
    break;
3454
0
  default:
3455
0
    DXASSERT(0, "invalid opcode for Sample");
3456
0
    break;
3457
4.86k
  }
3458
4.86k
  DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
3459
4.86k
           "otherwise, unused HL arguments for Sample op");
3460
4.86k
}
3461
3462
Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3463
                             HLOperationLowerHelper &helper,
3464
                             HLObjectOperationLowerHelper *pObjHelper,
3465
172
                             bool &Translated) {
3466
172
  hlsl::OP *hlslOP = &helper.hlslOP;
3467
172
  SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper);
3468
172
  if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
3469
0
    Translated = false;
3470
0
    return nullptr;
3471
0
  }
3472
3473
172
  bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail;
3474
172
  IRBuilder<> Builder(CI);
3475
172
  Value *opArg =
3476
172
      hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD));
3477
172
  Value *clamped = hlslOP->GetI1Const(bClamped);
3478
3479
172
  Value *args[] = {opArg,
3480
172
                   sampleHelper.texHandle,
3481
172
                   sampleHelper.samplerHandle,
3482
172
                   sampleHelper.coord[0],
3483
172
                   sampleHelper.coord[1],
3484
172
                   sampleHelper.coord[2],
3485
172
                   clamped};
3486
172
  Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD,
3487
172
                                         Type::getFloatTy(opArg->getContext()));
3488
172
  Value *LOD = Builder.CreateCall(dxilFunc, args);
3489
172
  return LOD;
3490
172
}
3491
3492
Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3493
                            HLOperationLowerHelper &helper,
3494
                            HLObjectOperationLowerHelper *pObjHelper,
3495
456
                            bool &Translated) {
3496
  // Translate CheckAccess into uint->bool, later optimization should remove it.
3497
  // Real checkaccess is generated in UpdateStatus.
3498
456
  IRBuilder<> Builder(CI);
3499
456
  Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
3500
456
  return Builder.CreateTrunc(V, helper.i1Ty);
3501
456
}
3502
3503
void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs,
3504
4.39k
                        Value *status, hlsl::OP *hlslOp) {
3505
4.39k
  IRBuilder<> Builder(CI);
3506
3507
4.39k
  CallInst *call = Builder.CreateCall(F, sampleArgs);
3508
3509
4.39k
  dxilutil::MigrateDebugValue(CI, call);
3510
3511
  // extract value part
3512
4.39k
  Value *retVal = ScalarizeResRet(CI->getType(), call, Builder);
3513
3514
  // Replace ret val.
3515
4.39k
  CI->replaceAllUsesWith(retVal);
3516
3517
  // get status
3518
4.39k
  if (status) {
3519
352
    UpdateStatus(call, status, Builder, hlslOp);
3520
352
  }
3521
4.39k
}
3522
3523
Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3524
                       HLOperationLowerHelper &helper,
3525
                       HLObjectOperationLowerHelper *pObjHelper,
3526
4.39k
                       bool &Translated) {
3527
4.39k
  hlsl::OP *hlslOP = &helper.hlslOP;
3528
4.39k
  SampleHelper sampleHelper(CI, opcode, pObjHelper);
3529
3530
4.39k
  if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
3531
0
    Translated = false;
3532
0
    return nullptr;
3533
0
  }
3534
4.39k
  Type *Ty = CI->getType();
3535
3536
4.39k
  Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
3537
3538
4.39k
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
3539
3540
4.39k
  switch (opcode) {
3541
2.42k
  case OP::OpCode::Sample: {
3542
2.42k
    Value *sampleArgs[] = {
3543
2.42k
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3544
        // Coord.
3545
2.42k
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3546
2.42k
        sampleHelper.coord[3],
3547
        // Offset.
3548
2.42k
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3549
        // Clamp.
3550
2.42k
        sampleHelper.clamp};
3551
2.42k
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3552
2.42k
  } break;
3553
1.00k
  case OP::OpCode::SampleLevel: {
3554
1.00k
    Value *sampleArgs[] = {
3555
1.00k
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3556
        // Coord.
3557
1.00k
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3558
1.00k
        sampleHelper.coord[3],
3559
        // Offset.
3560
1.00k
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3561
        // LOD.
3562
1.00k
        sampleHelper.lod};
3563
1.00k
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3564
1.00k
  } break;
3565
120
  case OP::OpCode::SampleGrad: {
3566
120
    Value *sampleArgs[] = {
3567
120
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3568
        // Coord.
3569
120
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3570
120
        sampleHelper.coord[3],
3571
        // Offset.
3572
120
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3573
        // Ddx.
3574
120
        sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
3575
        // Ddy.
3576
120
        sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
3577
        // Clamp.
3578
120
        sampleHelper.clamp};
3579
120
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3580
120
  } break;
3581
196
  case OP::OpCode::SampleBias: {
3582
196
    Value *sampleArgs[] = {
3583
196
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3584
        // Coord.
3585
196
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3586
196
        sampleHelper.coord[3],
3587
        // Offset.
3588
196
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3589
        // Bias.
3590
196
        sampleHelper.bias,
3591
        // Clamp.
3592
196
        sampleHelper.clamp};
3593
196
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3594
196
  } break;
3595
48
  case OP::OpCode::SampleCmpBias: {
3596
48
    Value *sampleArgs[] = {
3597
48
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3598
        // Coord.
3599
48
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3600
48
        sampleHelper.coord[3],
3601
        // Offset.
3602
48
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3603
        // CmpVal.
3604
48
        sampleHelper.compareValue,
3605
        // Bias.
3606
48
        sampleHelper.bias,
3607
        // Clamp.
3608
48
        sampleHelper.clamp};
3609
48
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3610
48
  } break;
3611
48
  case OP::OpCode::SampleCmpGrad: {
3612
48
    Value *sampleArgs[] = {
3613
48
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3614
        // Coord.
3615
48
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3616
48
        sampleHelper.coord[3],
3617
        // Offset.
3618
48
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3619
        // CmpVal.
3620
48
        sampleHelper.compareValue,
3621
        // Ddx.
3622
48
        sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
3623
        // Ddy.
3624
48
        sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
3625
        // Clamp.
3626
48
        sampleHelper.clamp};
3627
48
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3628
48
  } break;
3629
222
  case OP::OpCode::SampleCmp: {
3630
222
    Value *sampleArgs[] = {
3631
222
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3632
        // Coord.
3633
222
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3634
222
        sampleHelper.coord[3],
3635
        // Offset.
3636
222
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3637
        // CmpVal.
3638
222
        sampleHelper.compareValue,
3639
        // Clamp.
3640
222
        sampleHelper.clamp};
3641
222
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3642
222
  } break;
3643
192
  case OP::OpCode::SampleCmpLevel: {
3644
192
    Value *sampleArgs[] = {
3645
192
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3646
        // Coord.
3647
192
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3648
192
        sampleHelper.coord[3],
3649
        // Offset.
3650
192
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3651
        // CmpVal.
3652
192
        sampleHelper.compareValue,
3653
        // LOD.
3654
192
        sampleHelper.lod};
3655
192
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3656
192
  } break;
3657
140
  case OP::OpCode::SampleCmpLevelZero:
3658
140
  default: {
3659
140
    DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode");
3660
140
    Value *sampleArgs[] = {
3661
140
        opArg, sampleHelper.texHandle, sampleHelper.samplerHandle,
3662
        // Coord.
3663
140
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
3664
140
        sampleHelper.coord[3],
3665
        // Offset.
3666
140
        sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2],
3667
        // CmpVal.
3668
140
        sampleHelper.compareValue};
3669
140
    GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP);
3670
140
  } break;
3671
4.39k
  }
3672
  // CI is replaced in GenerateDxilSample.
3673
4.39k
  return nullptr;
3674
4.39k
}
3675
3676
// Gather intrinsics.
3677
struct GatherHelper {
3678
  enum class GatherChannel {
3679
    GatherAll,
3680
    GatherRed,
3681
    GatherGreen,
3682
    GatherBlue,
3683
    GatherAlpha,
3684
  };
3685
3686
  GatherHelper(CallInst *CI, OP::OpCode op,
3687
               HLObjectOperationLowerHelper *pObjHelper,
3688
               GatherHelper::GatherChannel ch);
3689
3690
  OP::OpCode opcode;
3691
  Value *texHandle;
3692
  Value *samplerHandle;
3693
  static const unsigned kMaxCoordDimensions = 4;
3694
  Value *coord[kMaxCoordDimensions];
3695
  unsigned channel;
3696
  Value *special; // For CompareValue, Bias, LOD.
3697
  // Optional.
3698
  static const unsigned kMaxOffsetDimensions = 2;
3699
  Value *offset[kMaxOffsetDimensions];
3700
  // For the overload send different offset for each sample.
3701
  // Only save 3 sampleOffsets because use offset for normal overload as first
3702
  // sample offset.
3703
  static const unsigned kSampleOffsetDimensions = 3;
3704
  Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions];
3705
  Value *status;
3706
3707
  bool hasSampleOffsets;
3708
3709
  unsigned maxHLOperandRead = 0;
3710
6.79k
  Value *ReadHLOperand(CallInst *CI, unsigned opIdx) {
3711
6.79k
    if (CI->getNumArgOperands() > opIdx) {
3712
4.58k
      maxHLOperandRead = std::max(maxHLOperandRead, opIdx);
3713
4.58k
      return CI->getArgOperand(opIdx);
3714
4.58k
    }
3715
2.20k
    return nullptr;
3716
6.79k
  }
3717
  void TranslateCoord(CallInst *CI, unsigned coordIdx,
3718
1.73k
                      unsigned coordDimensions) {
3719
1.73k
    Value *coordArg = ReadHLOperand(CI, coordIdx);
3720
1.73k
    DXASSERT_NOMSG(coordArg);
3721
1.73k
    DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions,
3722
1.73k
             "otherwise, HL coordinate dimensions mismatch");
3723
1.73k
    IRBuilder<> Builder(CI);
3724
5.80k
    for (unsigned i = 0; i < coordDimensions; 
i++4.06k
)
3725
4.06k
      coord[i] = Builder.CreateExtractElement(coordArg, i);
3726
1.73k
    Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext()));
3727
4.62k
    for (unsigned i = coordDimensions; i < kMaxCoordDimensions; 
i++2.88k
)
3728
2.88k
      coord[i] = undefF;
3729
1.73k
  }
3730
1.73k
  void SetStatus(CallInst *CI, unsigned statusIdx) {
3731
1.73k
    status = ReadHLOperand(CI, statusIdx);
3732
1.73k
  }
3733
  void TranslateOffset(CallInst *CI, unsigned offsetIdx,
3734
1.73k
                       unsigned offsetDimensions) {
3735
1.73k
    IntegerType *i32Ty = Type::getInt32Ty(CI->getContext());
3736
1.73k
    if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) {
3737
804
      DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions,
3738
804
               "otherwise, HL coordinate dimensions mismatch");
3739
804
      IRBuilder<> Builder(CI);
3740
2.41k
      for (unsigned i = 0; i < offsetDimensions; 
i++1.60k
)
3741
1.60k
        offset[i] = Builder.CreateExtractElement(offsetArg, i);
3742
934
    } else {
3743
      // Use zeros for offsets when not specified, not undef.
3744
934
      Value *zero = ConstantInt::get(i32Ty, (uint64_t)0);
3745
2.14k
      for (unsigned i = 0; i < offsetDimensions; 
i++1.21k
)
3746
1.21k
        offset[i] = zero;
3747
934
    }
3748
    // Use undef for components that should not be used for this resource dim.
3749
1.73k
    Value *undefI = UndefValue::get(i32Ty);
3750
2.39k
    for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; 
i++656
)
3751
656
      offset[i] = undefI;
3752
1.73k
  }
3753
  void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx,
3754
848
                             unsigned offsetDimensions) {
3755
848
    Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext()));
3756
848
    if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) {
3757
344
      hasSampleOffsets = true;
3758
344
      IRBuilder<> Builder(CI);
3759
1.37k
      for (unsigned ch = 0; ch < kSampleOffsetDimensions; 
ch++1.03k
) {
3760
1.03k
        Value *offsetArg = ReadHLOperand(CI, offsetIdx + ch);
3761
3.09k
        for (unsigned i = 0; i < offsetDimensions; 
i++2.06k
)
3762
2.06k
          sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i);
3763
1.03k
        for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; 
i++0
)
3764
0
          sampleOffsets[ch][i] = undefI;
3765
1.03k
      }
3766
344
    }
3767
848
  }
3768
  // Update the offset args for gather with sample offset at sampleIdx.
3769
  void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs,
3770
1.03k
                                unsigned sampleIdx) {
3771
1.03k
    unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx;
3772
3.09k
    for (unsigned i = 0; i < kMaxOffsetDimensions; 
i++2.06k
)
3773
      // -1 because offset for sample 0 is in GatherHelper::offset.
3774
2.06k
      gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i];
3775
1.03k
  }
3776
};
3777
3778
GatherHelper::GatherHelper(CallInst *CI, OP::OpCode op,
3779
                           HLObjectOperationLowerHelper *pObjHelper,
3780
                           GatherHelper::GatherChannel ch)
3781
1.73k
    : opcode(op), special(nullptr), hasSampleOffsets(false) {
3782
3783
1.73k
  switch (ch) {
3784
626
  case GatherChannel::GatherAll:
3785
626
    channel = 0;
3786
626
    break;
3787
320
  case GatherChannel::GatherRed:
3788
320
    channel = 0;
3789
320
    break;
3790
256
  case GatherChannel::GatherGreen:
3791
256
    channel = 1;
3792
256
    break;
3793
272
  case GatherChannel::GatherBlue:
3794
272
    channel = 2;
3795
272
    break;
3796
264
  case GatherChannel::GatherAlpha:
3797
264
    channel = 3;
3798
264
    break;
3799
1.73k
  }
3800
3801
1.73k
  IRBuilder<> Builder(CI);
3802
1.73k
  texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
3803
1.73k
  samplerHandle = CI->getArgOperand(HLOperandIndex::kSampleSamplerArgIndex);
3804
3805
1.73k
  DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle);
3806
1.73k
  if (RK == DXIL::ResourceKind::Invalid) {
3807
0
    opcode = DXIL::OpCode::NumOpCodes;
3808
0
    return;
3809
0
  }
3810
1.73k
  unsigned coordSize = DxilResource::GetNumCoords(RK);
3811
1.73k
  unsigned offsetSize = DxilResource::GetNumOffsets(RK);
3812
1.73k
  bool cube = RK == DXIL::ResourceKind::TextureCube ||
3813
1.73k
              
RK == DXIL::ResourceKind::TextureCubeArray1.57k
;
3814
3815
1.73k
  const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex;
3816
1.73k
  TranslateCoord(CI, kCoordArgIdx, coordSize);
3817
3818
1.73k
  switch (op) {
3819
1.04k
  case OP::OpCode::TextureGather: {
3820
1.04k
    unsigned statusIdx;
3821
1.04k
    if (cube) {
3822
168
      TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
3823
168
      statusIdx = HLOperandIndex::kGatherCubeStatusArgIndex;
3824
880
    } else {
3825
880
      TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
3826
      // Gather all don't have sample offset version overload.
3827
880
      if (ch != GatherChannel::GatherAll)
3828
536
        TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex,
3829
536
                              offsetSize);
3830
880
      statusIdx = hasSampleOffsets
3831
880
                      ? 
HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex216
3832
880
                      : 
HLOperandIndex::kGatherStatusArgIndex664
;
3833
880
    }
3834
1.04k
    SetStatus(CI, statusIdx);
3835
1.04k
  } break;
3836
546
  case OP::OpCode::TextureGatherCmp: {
3837
546
    special = ReadHLOperand(CI, HLOperandIndex::kGatherCmpCmpValArgIndex);
3838
546
    unsigned statusIdx;
3839
546
    if (cube) {
3840
160
      TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize);
3841
160
      statusIdx = HLOperandIndex::kGatherCmpCubeStatusArgIndex;
3842
386
    } else {
3843
386
      TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize);
3844
      // Gather all don't have sample offset version overload.
3845
386
      if (ch != GatherChannel::GatherAll)
3846
312
        TranslateSampleOffset(
3847
312
            CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex, offsetSize);
3848
386
      statusIdx = hasSampleOffsets
3849
386
                      ? 
HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex128
3850
386
                      : 
HLOperandIndex::kGatherCmpStatusArgIndex258
;
3851
386
    }
3852
546
    SetStatus(CI, statusIdx);
3853
546
  } break;
3854
144
  case OP::OpCode::TextureGatherRaw: {
3855
144
    unsigned statusIdx;
3856
144
    TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize);
3857
    // Gather all don't have sample offset version overload.
3858
144
    DXASSERT(ch == GatherChannel::GatherAll,
3859
144
             "Raw gather must use all channels");
3860
144
    DXASSERT(!cube, "Raw gather can't be used with cube textures");
3861
144
    DXASSERT(!hasSampleOffsets,
3862
144
             "Raw gather doesn't support individual offsets");
3863
144
    statusIdx = HLOperandIndex::kGatherStatusArgIndex;
3864
144
    SetStatus(CI, statusIdx);
3865
144
  } break;
3866
0
  default:
3867
0
    DXASSERT(0, "invalid opcode for Gather");
3868
0
    break;
3869
1.73k
  }
3870
1.73k
  DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1,
3871
1.73k
           "otherwise, unused HL arguments for Sample op");
3872
1.73k
}
3873
3874
void GenerateDxilGather(CallInst *CI, Function *F,
3875
                        MutableArrayRef<Value *> gatherArgs,
3876
1.73k
                        GatherHelper &helper, hlsl::OP *hlslOp) {
3877
1.73k
  IRBuilder<> Builder(CI);
3878
3879
1.73k
  CallInst *call = Builder.CreateCall(F, gatherArgs);
3880
3881
1.73k
  dxilutil::MigrateDebugValue(CI, call);
3882
3883
1.73k
  Value *retVal;
3884
1.73k
  if (!helper.hasSampleOffsets) {
3885
    // extract value part
3886
1.39k
    retVal = ScalarizeResRet(CI->getType(), call, Builder);
3887
1.39k
  } else {
3888
344
    retVal = UndefValue::get(CI->getType());
3889
344
    Value *elt = Builder.CreateExtractValue(call, (uint64_t)0);
3890
344
    retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0);
3891
3892
344
    helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1);
3893
344
    CallInst *callY = Builder.CreateCall(F, gatherArgs);
3894
344
    elt = Builder.CreateExtractValue(callY, (uint64_t)1);
3895
344
    retVal = Builder.CreateInsertElement(retVal, elt, 1);
3896
3897
344
    helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2);
3898
344
    CallInst *callZ = Builder.CreateCall(F, gatherArgs);
3899
344
    elt = Builder.CreateExtractValue(callZ, (uint64_t)2);
3900
344
    retVal = Builder.CreateInsertElement(retVal, elt, 2);
3901
3902
344
    helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3);
3903
344
    CallInst *callW = Builder.CreateCall(F, gatherArgs);
3904
344
    elt = Builder.CreateExtractValue(callW, (uint64_t)3);
3905
344
    retVal = Builder.CreateInsertElement(retVal, elt, 3);
3906
3907
    // TODO: UpdateStatus for each gather call.
3908
344
  }
3909
3910
  // Replace ret val.
3911
1.73k
  CI->replaceAllUsesWith(retVal);
3912
3913
  // Get status
3914
1.73k
  if (helper.status) {
3915
464
    UpdateStatus(call, helper.status, Builder, hlslOp);
3916
464
  }
3917
1.73k
}
3918
3919
Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
3920
                       HLOperationLowerHelper &helper,
3921
                       HLObjectOperationLowerHelper *pObjHelper,
3922
1.73k
                       bool &Translated) {
3923
1.73k
  hlsl::OP *hlslOP = &helper.hlslOP;
3924
1.73k
  GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll;
3925
1.73k
  switch (IOP) {
3926
376
  case IntrinsicOp::MOP_Gather:
3927
482
  case IntrinsicOp::MOP_GatherCmp:
3928
626
  case IntrinsicOp::MOP_GatherRaw:
3929
626
    ch = GatherHelper::GatherChannel::GatherAll;
3930
626
    break;
3931
192
  case IntrinsicOp::MOP_GatherRed:
3932
320
  case IntrinsicOp::MOP_GatherCmpRed:
3933
320
    ch = GatherHelper::GatherChannel::GatherRed;
3934
320
    break;
3935
152
  case IntrinsicOp::MOP_GatherGreen:
3936
256
  case IntrinsicOp::MOP_GatherCmpGreen:
3937
256
    ch = GatherHelper::GatherChannel::GatherGreen;
3938
256
    break;
3939
168
  case IntrinsicOp::MOP_GatherBlue:
3940
272
  case IntrinsicOp::MOP_GatherCmpBlue:
3941
272
    ch = GatherHelper::GatherChannel::GatherBlue;
3942
272
    break;
3943
160
  case IntrinsicOp::MOP_GatherAlpha:
3944
264
  case IntrinsicOp::MOP_GatherCmpAlpha:
3945
264
    ch = GatherHelper::GatherChannel::GatherAlpha;
3946
264
    break;
3947
0
  default:
3948
0
    DXASSERT(0, "invalid gather intrinsic");
3949
0
    break;
3950
1.73k
  }
3951
3952
1.73k
  GatherHelper gatherHelper(CI, opcode, pObjHelper, ch);
3953
3954
1.73k
  if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) {
3955
0
    Translated = false;
3956
0
    return nullptr;
3957
0
  }
3958
1.73k
  Type *Ty = CI->getType();
3959
3960
1.73k
  Function *F = hlslOP->GetOpFunc(gatherHelper.opcode, Ty->getScalarType());
3961
3962
1.73k
  Constant *opArg = hlslOP->GetU32Const((unsigned)gatherHelper.opcode);
3963
1.73k
  Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel);
3964
3965
1.73k
  switch (opcode) {
3966
1.04k
  case OP::OpCode::TextureGather: {
3967
1.04k
    Value *gatherArgs[] = {opArg, gatherHelper.texHandle,
3968
1.04k
                           gatherHelper.samplerHandle,
3969
                           // Coord.
3970
1.04k
                           gatherHelper.coord[0], gatherHelper.coord[1],
3971
1.04k
                           gatherHelper.coord[2], gatherHelper.coord[3],
3972
                           // Offset.
3973
1.04k
                           gatherHelper.offset[0], gatherHelper.offset[1],
3974
                           // Channel.
3975
1.04k
                           channelArg};
3976
1.04k
    GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
3977
1.04k
  } break;
3978
546
  case OP::OpCode::TextureGatherCmp: {
3979
546
    Value *gatherArgs[] = {opArg, gatherHelper.texHandle,
3980
546
                           gatherHelper.samplerHandle,
3981
                           // Coord.
3982
546
                           gatherHelper.coord[0], gatherHelper.coord[1],
3983
546
                           gatherHelper.coord[2], gatherHelper.coord[3],
3984
                           // Offset.
3985
546
                           gatherHelper.offset[0], gatherHelper.offset[1],
3986
                           // Channel.
3987
546
                           channelArg,
3988
                           // CmpVal.
3989
546
                           gatherHelper.special};
3990
546
    GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
3991
546
  } break;
3992
144
  case OP::OpCode::TextureGatherRaw: {
3993
144
    Value *gatherArgs[] = {opArg, gatherHelper.texHandle,
3994
144
                           gatherHelper.samplerHandle,
3995
                           // Coord.
3996
144
                           gatherHelper.coord[0], gatherHelper.coord[1],
3997
144
                           gatherHelper.coord[2], gatherHelper.coord[3],
3998
                           // Offset.
3999
144
                           gatherHelper.offset[0], gatherHelper.offset[1]};
4000
144
    GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP);
4001
144
    break;
4002
0
  }
4003
0
  default:
4004
0
    DXASSERT(0, "invalid opcode for Gather");
4005
0
    break;
4006
1.73k
  }
4007
  // CI is replaced in GenerateDxilGather.
4008
1.73k
  return nullptr;
4009
1.73k
}
4010
4011
static Value *
4012
TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
4013
                              HLOperationLowerHelper &helper,
4014
                              HLObjectOperationLowerHelper *pObjHelper,
4015
300
                              bool &Translated) {
4016
300
  hlsl::OP *hlslOP = &helper.hlslOP;
4017
300
  SampleHelper sampleHelper(CI, opcode, pObjHelper);
4018
4019
300
  if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) {
4020
0
    Translated = false;
4021
0
    return nullptr;
4022
0
  }
4023
300
  Type *Ty = CI->getType();
4024
4025
300
  Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
4026
4027
300
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
4028
4029
300
  IRBuilder<> Builder(CI);
4030
4031
300
  switch (opcode) {
4032
180
  case OP::OpCode::WriteSamplerFeedback: {
4033
180
    Value *samplerFeedbackArgs[] = {
4034
180
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
4035
180
        sampleHelper.samplerHandle,
4036
        // Coord.
4037
180
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
4038
180
        sampleHelper.coord[3],
4039
        // Clamp.
4040
180
        sampleHelper.clamp};
4041
180
    return Builder.CreateCall(F, samplerFeedbackArgs);
4042
0
  } break;
4043
64
  case OP::OpCode::WriteSamplerFeedbackBias: {
4044
64
    Value *samplerFeedbackArgs[] = {
4045
64
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
4046
64
        sampleHelper.samplerHandle,
4047
        // Coord.
4048
64
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
4049
64
        sampleHelper.coord[3],
4050
        // Bias.
4051
64
        sampleHelper.bias,
4052
        // Clamp.
4053
64
        sampleHelper.clamp};
4054
64
    return Builder.CreateCall(F, samplerFeedbackArgs);
4055
0
  } break;
4056
32
  case OP::OpCode::WriteSamplerFeedbackGrad: {
4057
32
    Value *samplerFeedbackArgs[] = {
4058
32
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
4059
32
        sampleHelper.samplerHandle,
4060
        // Coord.
4061
32
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
4062
32
        sampleHelper.coord[3],
4063
        // Ddx.
4064
32
        sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2],
4065
        // Ddy.
4066
32
        sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2],
4067
        // Clamp.
4068
32
        sampleHelper.clamp};
4069
32
    return Builder.CreateCall(F, samplerFeedbackArgs);
4070
0
  } break;
4071
24
  case OP::OpCode::WriteSamplerFeedbackLevel: {
4072
24
    Value *samplerFeedbackArgs[] = {
4073
24
        opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle,
4074
24
        sampleHelper.samplerHandle,
4075
        // Coord.
4076
24
        sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2],
4077
24
        sampleHelper.coord[3],
4078
        // LOD.
4079
24
        sampleHelper.lod};
4080
24
    return Builder.CreateCall(F, samplerFeedbackArgs);
4081
0
  } break;
4082
0
  default:
4083
0
    DXASSERT(false, "otherwise, unknown SamplerFeedback Op");
4084
0
    break;
4085
300
  }
4086
0
  return nullptr;
4087
300
}
4088
4089
// Load/Store intrinsics.
4090
21.8k
OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) {
4091
21.8k
  switch (RK) {
4092
3.46k
  case DxilResource::Kind::RawBuffer:
4093
14.1k
  case DxilResource::Kind::StructuredBuffer:
4094
14.1k
    return OP::OpCode::RawBufferLoad;
4095
2.56k
  case DxilResource::Kind::TypedBuffer:
4096
2.56k
    return OP::OpCode::BufferLoad;
4097
0
  case DxilResource::Kind::Invalid:
4098
0
    DXASSERT(0, "invalid resource kind");
4099
0
    break;
4100
5.05k
  default:
4101
5.05k
    return OP::OpCode::TextureLoad;
4102
21.8k
  }
4103
0
  return OP::OpCode::TextureLoad;
4104
21.8k
}
4105
4106
struct ResLoadHelper {
4107
  // Default constructor uses CI load intrinsic call
4108
  //  to get the retval and various location indicators.
4109
  ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC,
4110
                Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr);
4111
  // Alternative constructor explicitly sets the index.
4112
  // Used for some subscript operators that feed the generic HL call inst
4113
  // into a load op and by the matrixload call instruction.
4114
  ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx,
4115
                Value *Offset, Value *status = nullptr, Value *mip = nullptr)
4116
10.1k
      : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst),
4117
10.1k
        addr(idx), offset(Offset), status(status), mipLevel(mip) {
4118
10.1k
    opcode = LoadOpFromResKind(RK);
4119
10.1k
    Type *Ty = Inst->getType();
4120
10.1k
    if (opcode == OP::OpCode::RawBufferLoad && 
Ty->isVectorTy()10.0k
&&
4121
10.1k
        
Ty->getVectorNumElements() > 15.98k
&&
4122
10.1k
        
Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()4.27k
)
4123
1.40k
      opcode = OP::OpCode::RawBufferVectorLoad;
4124
10.1k
  }
4125
  OP::OpCode opcode;
4126
  IntrinsicOp intrinsicOpCode;
4127
  unsigned dxilMajor;
4128
  unsigned dxilMinor;
4129
  Value *handle;
4130
  Value *retVal;
4131
  Value *addr;
4132
  Value *offset;
4133
  Value *status;
4134
  Value *mipLevel;
4135
};
4136
4137
// Uses CI arguments to determine the index, offset, and mipLevel also depending
4138
// on the RK/RC resource kind and class, which determine the opcode.
4139
// Handle and IOP are set explicitly.
4140
// For typed buffer loads, the call instruction feeds into a load
4141
// represented by TyBufSubLoad which determines the instruction to replace.
4142
// Otherwise, CI is replaced.
4143
ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK,
4144
                             DxilResourceBase::Class RC, Value *hdl,
4145
                             IntrinsicOp IOP, LoadInst *TyBufSubLoad)
4146
11.6k
    : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) {
4147
11.6k
  opcode = LoadOpFromResKind(RK);
4148
11.6k
  bool bForSubscript = false;
4149
11.6k
  if (TyBufSubLoad) {
4150
2.81k
    bForSubscript = true;
4151
2.81k
    retVal = TyBufSubLoad;
4152
2.81k
  } else
4153
8.85k
    retVal = CI;
4154
11.6k
  const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx;
4155
11.6k
  addr = CI->getArgOperand(kAddrIdx);
4156
11.6k
  unsigned argc = CI->getNumArgOperands();
4157
11.6k
  Type *i32Ty = Type::getInt32Ty(CI->getContext());
4158
11.6k
  unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx;
4159
11.6k
  unsigned OffsetIdx = HLOperandIndex::kInvalidIdx;
4160
4161
11.6k
  if (opcode == OP::OpCode::TextureLoad) {
4162
4.91k
    bool IsMS = (RK == DxilResource::Kind::Texture2DMS ||
4163
4.91k
                 
RK == DxilResource::Kind::Texture2DMSArray4.66k
);
4164
    // Set mip and status index.
4165
4.91k
    offset = UndefValue::get(i32Ty);
4166
4.91k
    if (IsMS) {
4167
      // Retrieve appropriate MS parameters.
4168
408
      StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx;
4169
      // MS textures keep the sample param (mipLevel) regardless of writability.
4170
408
      if (bForSubscript)
4171
50
        mipLevel = ConstantInt::get(i32Ty, 0);
4172
358
      else
4173
358
        mipLevel =
4174
358
            CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx);
4175
4.51k
    } else if (RC == DxilResourceBase::Class::UAV) {
4176
      // DXIL requires that non-MS UAV accesses set miplevel to undef.
4177
2.07k
      mipLevel = UndefValue::get(i32Ty);
4178
2.07k
      StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx;
4179
2.43k
    } else {
4180
      // Non-MS SRV case.
4181
2.43k
      StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx;
4182
2.43k
      if (bForSubscript)
4183
        // Having no miplevel param, single subscripted SRVs default to 0.
4184
1.40k
        mipLevel = ConstantInt::get(i32Ty, 0);
4185
1.03k
      else
4186
        // Mip is stored at the last channel of the coordinate vector.
4187
1.03k
        mipLevel = IRBuilder<>(CI).CreateExtractElement(
4188
1.03k
            addr, DxilResource::GetNumCoords(RK));
4189
2.43k
    }
4190
4.91k
    if (RC == DxilResourceBase::Class::SRV)
4191
2.73k
      OffsetIdx = IsMS ? 
HLOperandIndex::kTex2DMSLoadOffsetOpIdx304
4192
2.73k
                       : 
HLOperandIndex::kTexLoadOffsetOpIdx2.43k
;
4193
6.75k
  } else if (opcode == OP::OpCode::RawBufferLoad) {
4194
    // If native vectors are available and this load had a vector
4195
    // with more than one elements, convert the RawBufferLod to the
4196
    // native vector variant RawBufferVectorLoad.
4197
4.19k
    Type *Ty = CI->getType();
4198
4.19k
    if (Ty->isVectorTy() && 
Ty->getVectorNumElements() > 12.20k
&&
4199
4.19k
        
CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()2.07k
)
4200
1.00k
      opcode = OP::OpCode::RawBufferVectorLoad;
4201
4.19k
  }
4202
4203
  // Set offset.
4204
11.6k
  if (DXIL::IsStructuredBuffer(RK))
4205
    // Structured buffers receive no exterior offset in this constructor,
4206
    // but may need to increment it later.
4207
1.29k
    offset = ConstantInt::get(i32Ty, 0U);
4208
10.3k
  else if (argc > OffsetIdx)
4209
    // Textures may set the offset from an explicit argument.
4210
102
    offset = CI->getArgOperand(OffsetIdx);
4211
10.2k
  else
4212
    // All other cases use undef.
4213
10.2k
    offset = UndefValue::get(i32Ty);
4214
4215
  // Retrieve status value if provided.
4216
11.6k
  if (argc > StatusIdx)
4217
1.12k
    status = CI->getArgOperand(StatusIdx);
4218
11.6k
}
4219
4220
void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
4221
                                 hlsl::OP *OP, HLResource::Kind RK,
4222
                                 const DataLayout &DL);
4223
4224
static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents,
4225
11.6k
                                        hlsl::OP *OP) {
4226
11.6k
  unsigned mask = 0;
4227
4228
11.6k
  switch (NumComponents) {
4229
0
  case 0:
4230
0
    break;
4231
7.03k
  case 1:
4232
7.03k
    mask = DXIL::kCompMask_X;
4233
7.03k
    break;
4234
1.14k
  case 2:
4235
1.14k
    mask = DXIL::kCompMask_X | DXIL::kCompMask_Y;
4236
1.14k
    break;
4237
606
  case 3:
4238
606
    mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z;
4239
606
    break;
4240
2.89k
  case 4:
4241
2.89k
    mask = DXIL::kCompMask_All;
4242
2.89k
    break;
4243
0
  default:
4244
0
    DXASSERT(false, "Cannot load more than 2 components for 64bit types.");
4245
11.6k
  }
4246
11.6k
  return OP->GetI8Const(mask);
4247
11.6k
}
4248
4249
Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
4250
                        Value *status, Type *EltTy,
4251
                        MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
4252
                        IRBuilder<> &Builder, unsigned NumComponents,
4253
                        Constant *alignment);
4254
4255
// Sets up arguments for buffer load call.
4256
static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
4257
                                               HLResource::Kind RK,
4258
                                               IRBuilder<> Builder,
4259
18.1k
                                               unsigned LdSize) {
4260
18.1k
  OP::OpCode opcode = helper.opcode;
4261
18.1k
  llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode);
4262
4263
18.1k
  unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 
4U3.18k
:
8U14.9k
;
4264
18.1k
  alignment = std::min(alignment, LdSize);
4265
18.1k
  Constant *alignmentVal = Builder.getInt32(alignment);
4266
4267
  // Assemble args specific to the type bab/struct/typed:
4268
  // - Typed needs to handle the possibility of vector coords
4269
  // - Raws need to calculate alignment and mask values.
4270
18.1k
  SmallVector<Value *, 10> Args;
4271
18.1k
  Args.emplace_back(opArg);         // opcode @0.
4272
18.1k
  Args.emplace_back(helper.handle); // Resource handle @1
4273
4274
  // Set offsets appropriate for the load operation.
4275
18.1k
  bool isVectorAddr = helper.addr->getType()->isVectorTy();
4276
18.1k
  if (opcode == OP::OpCode::TextureLoad) {
4277
3.73k
    llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty());
4278
4279
    // Set mip level or sample for MS texutures @2.
4280
3.73k
    Args.emplace_back(helper.mipLevel);
4281
    // Set texture coords according to resource kind @3-5
4282
    // Coords unused by the resource kind are undefs.
4283
3.73k
    unsigned coordSize = DxilResource::GetNumCoords(RK);
4284
14.9k
    for (unsigned i = 0; i < 3; 
i++11.1k
)
4285
11.1k
      if (i < coordSize)
4286
7.33k
        Args.emplace_back(isVectorAddr
4287
7.33k
                              ? 
Builder.CreateExtractElement(helper.addr, i)6.91k
4288
7.33k
                              : 
helper.addr420
);
4289
3.86k
      else
4290
3.86k
        Args.emplace_back(undefI);
4291
4292
    // Set texture offsets according to resource kind @7-9
4293
    // Coords unused by the resource kind are undefs.
4294
3.73k
    unsigned offsetSize = DxilResource::GetNumOffsets(RK);
4295
3.73k
    if (!helper.offset || isa<llvm::UndefValue>(helper.offset))
4296
3.63k
      offsetSize = 0;
4297
14.9k
    for (unsigned i = 0; i < 3; 
i++11.1k
)
4298
11.1k
      if (i < offsetSize)
4299
204
        Args.emplace_back(Builder.CreateExtractElement(helper.offset, i));
4300
10.9k
      else
4301
10.9k
        Args.emplace_back(undefI);
4302
14.3k
  } else {
4303
    // If not TextureLoad, it could be a typed or raw buffer load.
4304
    // They have mostly similar arguments.
4305
14.3k
    DXASSERT(opcode == OP::OpCode::RawBufferLoad ||
4306
14.3k
                 opcode == OP::OpCode::RawBufferVectorLoad ||
4307
14.3k
                 opcode == OP::OpCode::BufferLoad,
4308
14.3k
             "Wrong opcode in get load args");
4309
14.3k
    Args.emplace_back(
4310
14.3k
        isVectorAddr ? 
Builder.CreateExtractElement(helper.addr, (uint64_t)0)232
4311
14.3k
                     : 
helper.addr14.1k
);
4312
14.3k
    Args.emplace_back(helper.offset);
4313
14.3k
    if (opcode == OP::OpCode::RawBufferLoad) {
4314
      // Unlike typed buffer load, raw buffer load has mask and alignment.
4315
10.7k
      Args.emplace_back(nullptr);      // Mask will be added later %4.
4316
10.7k
      Args.emplace_back(alignmentVal); // alignment @5.
4317
10.7k
    } else 
if (3.65k
opcode == OP::OpCode::RawBufferVectorLoad3.65k
) {
4318
      // RawBufferVectorLoad takes just alignment, no mask.
4319
2.41k
      Args.emplace_back(alignmentVal); // alignment @4
4320
2.41k
    }
4321
14.3k
  }
4322
18.1k
  return Args;
4323
18.1k
}
4324
4325
20.4k
static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) {
4326
20.4k
  return !EltTy->isIntegerTy(1) &&
4327
20.4k
         
DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits()20.1k
;
4328
20.4k
}
4329
4330
static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx,
4331
20.4k
                                   const DataLayout &DL) {
4332
20.4k
  Type *EltTy = Ty->getScalarType();
4333
20.4k
  if (!isMinPrecisionType(EltTy, DL))
4334
20.2k
    return Ty;
4335
166
  Type *WideTy = EltTy->isFloatingPointTy() ? 
Type::getFloatTy(Ctx)48
4336
166
                                            : 
Type::getInt32Ty(Ctx)118
;
4337
166
  if (Ty->isVectorTy())
4338
158
    return VectorType::get(WideTy, Ty->getVectorNumElements());
4339
8
  return WideTy;
4340
166
}
4341
4342
// Emits as many calls as needed to load the full vector
4343
// Performs any needed extractions and conversions of the results.
4344
Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
4345
                        IRBuilder<> &Builder, hlsl::OP *OP,
4346
18.1k
                        const DataLayout &DL) {
4347
18.1k
  OP::OpCode opcode = helper.opcode;
4348
18.1k
  Type *Ty = helper.retVal->getType();
4349
4350
18.1k
  unsigned NumComponents = 1;
4351
18.1k
  if (Ty->isVectorTy())
4352
11.7k
    NumComponents = Ty->getVectorNumElements();
4353
4354
18.1k
  const bool isTyped = DXIL::IsTyped(RK);
4355
18.1k
  Type *OrigEltTy = Ty->getScalarType();
4356
18.1k
  Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
4357
18.1k
  Type *EltTy = WidenedTy->getScalarType();
4358
18.1k
  const bool isMinPrec = (WidenedTy != Ty);
4359
18.1k
  const bool is64 = (EltTy->isIntegerTy(64) || 
EltTy->isDoubleTy()16.9k
);
4360
18.1k
  const bool isBool = EltTy->isIntegerTy(1);
4361
  // DXIL buffer loads require i32; narrow types are reconverted after load.
4362
18.1k
  if (isBool || 
(17.7k
is6417.7k
&&
isTyped2.58k
))
4363
480
    EltTy = Builder.getInt32Ty();
4364
4365
  // Calculate load size with the scalar memory element type.
4366
18.1k
  unsigned LdSize = DL.getTypeAllocSize(EltTy);
4367
4368
  // Adjust number of components as needed.
4369
18.1k
  if (is64 && 
isTyped2.58k
) {
4370
    // 64-bit types are stored as int32 pairs in typed buffers.
4371
146
    DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords.");
4372
146
    NumComponents *= 2;
4373
17.9k
  } else if (opcode == OP::OpCode::RawBufferVectorLoad) {
4374
    // Native vector loads only have a single vector element in ResRet.
4375
2.41k
    EltTy = VectorType::get(EltTy, NumComponents);
4376
2.41k
    NumComponents = 1;
4377
2.41k
  }
4378
4379
18.1k
  SmallVector<Value *, 10> Args = GetBufLoadArgs(helper, RK, Builder, LdSize);
4380
4381
  // Keep track of the first load for debug info migration.
4382
18.1k
  Value *FirstLd = nullptr;
4383
4384
18.1k
  unsigned OffsetIdx = 0;
4385
18.1k
  if (RK == DxilResource::Kind::RawBuffer)
4386
    // Raw buffers can't use offset param. Add to coord index.
4387
3.18k
    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx;
4388
14.9k
  else if (RK == DxilResource::Kind::StructuredBuffer)
4389
9.96k
    OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx;
4390
4391
  // Create call(s) to function object and collect results in Elts.
4392
  // Typed buffer loads are limited to one load of up to 4 32-bit values.
4393
  // Raw buffer loads might need multiple loads in chunks of 4.
4394
18.1k
  SmallVector<Value *, 4> Elts(NumComponents);
4395
37.1k
  for (unsigned i = 0; i < NumComponents;) {
4396
    // Load 4 elements or however many less than 4 are left to load.
4397
19.0k
    unsigned chunkSize = std::min(NumComponents - i, 4U);
4398
4399
    // Assign mask for raw buffer loads.
4400
19.0k
    if (opcode == OP::OpCode::RawBufferLoad) {
4401
11.6k
      Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] =
4402
11.6k
          GetRawBufferMaskForETy(EltTy, chunkSize, OP);
4403
      // If we've loaded a chunk already, update offset to next chunk.
4404
11.6k
      if (FirstLd != nullptr)
4405
916
        Args[OffsetIdx] =
4406
916
            Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize));
4407
11.6k
    }
4408
4409
19.0k
    Function *F = OP->GetOpFunc(opcode, EltTy);
4410
19.0k
    Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode));
4411
19.0k
    unsigned StatusIndex;
4412
4413
    // Extract elements from returned ResRet.
4414
    // Native vector loads just have one vector element in the ResRet.
4415
    // Others have up to four scalars that need to be individually extracted.
4416
19.0k
    if (opcode == OP::OpCode::RawBufferVectorLoad) {
4417
2.41k
      Elts[i++] = Builder.CreateExtractValue(Ld, 0);
4418
2.41k
      StatusIndex = DXIL::kVecResRetStatusIndex;
4419
16.6k
    } else {
4420
53.5k
      for (unsigned j = 0; j < chunkSize; 
j++, i++36.9k
)
4421
36.9k
        Elts[i] = Builder.CreateExtractValue(Ld, j);
4422
16.6k
      StatusIndex = DXIL::kResRetStatusIndex;
4423
16.6k
    }
4424
4425
    // Update status.
4426
19.0k
    UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex);
4427
4428
19.0k
    if (!FirstLd)
4429
18.1k
      FirstLd = Ld;
4430
19.0k
  }
4431
18.1k
  DXASSERT(FirstLd, "No loads created by TranslateBufLoad");
4432
4433
  // Convert loaded 32-bit integers to intended 64-bit type representation.
4434
18.1k
  if (isTyped) {
4435
4.97k
    Type *RegEltTy = Ty->getScalarType();
4436
4.97k
    if (RegEltTy->isDoubleTy()) {
4437
68
      Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy);
4438
68
      Value *makeDoubleOpArg =
4439
68
          Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble);
4440
68
      NumComponents /= 2; // Convert back to number of doubles.
4441
160
      for (unsigned i = 0; i < NumComponents; 
i++92
) {
4442
92
        Value *lo = Elts[2 * i];
4443
92
        Value *hi = Elts[2 * i + 1];
4444
92
        Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi});
4445
92
      }
4446
68
      EltTy = RegEltTy;
4447
4.90k
    } else if (RegEltTy->isIntegerTy(64)) {
4448
78
      NumComponents /= 2; // Convert back to number of int64s.
4449
192
      for (unsigned i = 0; i < NumComponents; 
i++114
) {
4450
114
        Value *lo = Elts[2 * i];
4451
114
        Value *hi = Elts[2 * i + 1];
4452
114
        lo = Builder.CreateZExt(lo, RegEltTy);
4453
114
        hi = Builder.CreateZExt(hi, RegEltTy);
4454
114
        hi = Builder.CreateShl(hi, 32);
4455
114
        Elts[i] = Builder.CreateOr(lo, hi);
4456
114
      }
4457
78
      EltTy = RegEltTy;
4458
78
    }
4459
4.97k
  }
4460
4461
  // Package elements into a vector as needed.
4462
18.1k
  Value *retValNew = nullptr;
4463
  // Scalar or native vector loads need not construct vectors from elements.
4464
18.1k
  if (!Ty->isVectorTy() || 
opcode == OP::OpCode::RawBufferVectorLoad11.7k
) {
4465
8.81k
    retValNew = Elts[0];
4466
9.30k
  } else {
4467
9.30k
    retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents));
4468
39.6k
    for (unsigned i = 0; i < NumComponents; 
i++30.3k
)
4469
30.3k
      retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i);
4470
9.30k
  }
4471
4472
  // Convert loaded int32 bool results to i1 register representation.
4473
18.1k
  if (isBool)
4474
334
    retValNew = Builder.CreateICmpNE(
4475
334
        retValNew, Constant::getNullValue(retValNew->getType()));
4476
4477
  // DXIL loads min precision as 32-bit; narrow back to original IR type.
4478
18.1k
  if (isMinPrec) {
4479
142
    if (OrigEltTy->isIntegerTy())
4480
102
      retValNew = Builder.CreateTrunc(retValNew, Ty);
4481
40
    else
4482
40
      retValNew = Builder.CreateFPTrunc(retValNew, Ty);
4483
142
  }
4484
4485
18.1k
  helper.retVal->replaceAllUsesWith(retValNew);
4486
18.1k
  helper.retVal = retValNew;
4487
4488
18.1k
  return FirstLd;
4489
18.1k
}
4490
4491
Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
4492
                             HLOperationLowerHelper &helper,
4493
                             HLObjectOperationLowerHelper *pObjHelper,
4494
6.21k
                             bool &Translated) {
4495
6.21k
  hlsl::OP *hlslOP = &helper.hlslOP;
4496
6.21k
  DataLayout &DL = helper.dataLayout;
4497
6.21k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
4498
4499
6.21k
  IRBuilder<> Builder(CI);
4500
4501
6.21k
  DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
4502
6.21k
  DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
4503
4504
6.21k
  ResLoadHelper ldHelper(CI, RK, RC, handle, IOP);
4505
6.21k
  Type *Ty = CI->getType();
4506
6.21k
  Value *Ld = nullptr;
4507
6.21k
  if (Ty->isPointerTy()) {
4508
1.05k
    DXASSERT(!DxilResource::IsAnyTexture(RK),
4509
1.05k
             "Textures should not be treated as structured buffers.");
4510
1.05k
    TranslateStructBufSubscript(cast<CallInst>(ldHelper.retVal), handle,
4511
1.05k
                                ldHelper.status, hlslOP, RK, DL);
4512
5.16k
  } else {
4513
5.16k
    Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
4514
5.16k
    dxilutil::MigrateDebugValue(CI, Ld);
4515
5.16k
  }
4516
  // CI is replaced by above translation calls..
4517
6.21k
  return nullptr;
4518
6.21k
}
4519
4520
// Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi }
4521
void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size,
4522
                           MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP,
4523
218
                           IRBuilder<> &Builder) {
4524
218
  Type *i32Ty = Builder.getInt32Ty();
4525
218
  Type *doubleTy = Builder.getDoubleTy();
4526
218
  Value *undefI32 = UndefValue::get(i32Ty);
4527
4528
218
  if (EltTy == doubleTy) {
4529
40
    Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy);
4530
40
    Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble);
4531
92
    for (unsigned i = 0; i < size; 
i++52
) {
4532
52
      if (isa<UndefValue>(vals[i])) {
4533
0
        vals32[2 * i] = undefI32;
4534
0
        vals32[2 * i + 1] = undefI32;
4535
52
      } else {
4536
52
        Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]});
4537
52
        Value *lo = Builder.CreateExtractValue(retVal, 0);
4538
52
        Value *hi = Builder.CreateExtractValue(retVal, 1);
4539
52
        vals32[2 * i] = lo;
4540
52
        vals32[2 * i + 1] = hi;
4541
52
      }
4542
52
    }
4543
178
  } else {
4544
372
    for (unsigned i = 0; i < size; 
i++194
) {
4545
194
      if (isa<UndefValue>(vals[i])) {
4546
0
        vals32[2 * i] = undefI32;
4547
0
        vals32[2 * i + 1] = undefI32;
4548
194
      } else {
4549
194
        Value *lo = Builder.CreateTrunc(vals[i], i32Ty);
4550
194
        Value *hi = Builder.CreateLShr(vals[i], 32);
4551
194
        hi = Builder.CreateTrunc(hi, i32Ty);
4552
194
        vals32[2 * i] = lo;
4553
194
        vals32[2 * i + 1] = hi;
4554
194
      }
4555
194
    }
4556
178
  }
4557
218
}
4558
4559
void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
4560
                    Value *Idx, Value *offset, IRBuilder<> &Builder,
4561
16.7k
                    hlsl::OP *OP, Value *sampIdx = nullptr) {
4562
16.7k
  Type *Ty = val->getType();
4563
16.7k
  OP::OpCode opcode = OP::OpCode::NumOpCodes;
4564
16.7k
  bool IsTyped = true;
4565
16.7k
  switch (RK) {
4566
3.06k
  case DxilResource::Kind::RawBuffer:
4567
13.4k
  case DxilResource::Kind::StructuredBuffer:
4568
13.4k
    IsTyped = false;
4569
13.4k
    opcode = OP::OpCode::RawBufferStore;
4570
    // Where shader model and type allows, use vector store intrinsic.
4571
13.4k
    if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() &&
4572
13.4k
        
Ty->isVectorTy()4.90k
&&
Ty->getVectorNumElements() > 13.75k
)
4573
2.33k
      opcode = OP::OpCode::RawBufferVectorStore;
4574
13.4k
    break;
4575
1.23k
  case DxilResource::Kind::TypedBuffer:
4576
1.23k
    opcode = OP::OpCode::BufferStore;
4577
1.23k
    break;
4578
0
  case DxilResource::Kind::Invalid:
4579
0
    DXASSERT(0, "invalid resource kind");
4580
0
    break;
4581
48
  case DxilResource::Kind::Texture2DMS:
4582
80
  case DxilResource::Kind::Texture2DMSArray:
4583
80
    opcode = OP::OpCode::TextureStoreSample;
4584
80
    break;
4585
2.01k
  default:
4586
2.01k
    opcode = OP::OpCode::TextureStore;
4587
2.01k
    break;
4588
16.7k
  }
4589
4590
16.7k
  Type *i32Ty = Builder.getInt32Ty();
4591
16.7k
  Type *i64Ty = Builder.getInt64Ty();
4592
16.7k
  Type *doubleTy = Builder.getDoubleTy();
4593
16.7k
  Type *EltTy = Ty->getScalarType();
4594
16.7k
  if (EltTy->isIntegerTy(1)) {
4595
    // Since we're going to memory, convert bools to their memory
4596
    // representation.
4597
350
    EltTy = i32Ty;
4598
350
    if (Ty->isVectorTy())
4599
322
      Ty = VectorType::get(EltTy, Ty->getVectorNumElements());
4600
28
    else
4601
28
      Ty = EltTy;
4602
350
    val = Builder.CreateZExt(val, Ty);
4603
350
  }
4604
4605
  // Min precision alloc size is 32-bit; widen to match store intrinsic.
4606
  // Scalar RawBufferStore widening is handled by TranslateMinPrecisionRawBuffer
4607
  // in DxilGenerationPass, which has signedness info from struct annotations.
4608
16.7k
  if (opcode == OP::OpCode::RawBufferVectorStore) {
4609
2.33k
    const DataLayout &DL =
4610
2.33k
        OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
4611
2.33k
    Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
4612
2.33k
    if (WideTy != Ty) {
4613
24
      if (EltTy->isFloatingPointTy())
4614
8
        val = Builder.CreateFPExt(val, WideTy);
4615
16
      else
4616
        // TODO(#8314): Signedness info is lost by this point; SExt is wrong
4617
        // for min16uint. Front-end should widen during Clang CodeGen instead.
4618
16
        val = Builder.CreateSExt(val, WideTy);
4619
24
      EltTy = WideTy->getScalarType();
4620
24
      Ty = WideTy;
4621
24
    }
4622
2.33k
  }
4623
4624
  // If RawBuffer store of 64-bit value, don't set alignment to 8,
4625
  // since buffer alignment isn't known to be anything over 4.
4626
16.7k
  unsigned alignValue = OP->GetAllocSizeForType(EltTy);
4627
16.7k
  if (RK == HLResource::Kind::RawBuffer && 
alignValue > 43.06k
)
4628
232
    alignValue = 4;
4629
16.7k
  Constant *Alignment = OP->GetI32Const(alignValue);
4630
16.7k
  bool is64 = EltTy == i64Ty || 
EltTy == doubleTy15.8k
;
4631
16.7k
  if (is64 && 
IsTyped1.82k
) {
4632
218
    EltTy = i32Ty;
4633
218
  }
4634
4635
16.7k
  llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode);
4636
4637
16.7k
  llvm::Value *undefI =
4638
16.7k
      llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext()));
4639
4640
16.7k
  llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType());
4641
4642
16.7k
  SmallVector<Value *, 13> storeArgs;
4643
16.7k
  storeArgs.emplace_back(opArg);  // opcode
4644
16.7k
  storeArgs.emplace_back(handle); // resource handle
4645
4646
16.7k
  unsigned OffsetIdx = 0;
4647
16.7k
  if (opcode == OP::OpCode::RawBufferStore ||
4648
16.7k
      
opcode == OP::OpCode::RawBufferVectorStore5.66k
||
4649
16.7k
      
opcode == OP::OpCode::BufferStore3.33k
) {
4650
    // Append Coord0 (Index) value.
4651
14.7k
    if (Idx->getType()->isVectorTy()) {
4652
0
      Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0);
4653
0
      storeArgs.emplace_back(ScalarIdx); // Coord0 (Index).
4654
14.7k
    } else {
4655
14.7k
      storeArgs.emplace_back(Idx); // Coord0 (Index).
4656
14.7k
    }
4657
4658
    // Store OffsetIdx representing the argument that may need to be incremented
4659
    // later to load additional chunks of data.
4660
    // Only structured buffers can use the offset parameter.
4661
    // Others must increment the index.
4662
14.7k
    if (RK == DxilResource::Kind::StructuredBuffer)
4663
10.4k
      OffsetIdx = storeArgs.size();
4664
4.30k
    else
4665
4.30k
      OffsetIdx = storeArgs.size() - 1;
4666
4667
    // Coord1 (Offset).
4668
14.7k
    storeArgs.emplace_back(offset);
4669
14.7k
  } else {
4670
    // texture store
4671
2.09k
    unsigned coordSize = DxilResource::GetNumCoords(RK);
4672
4673
    // Set x first.
4674
2.09k
    if (Idx->getType()->isVectorTy())
4675
1.62k
      storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0));
4676
466
    else
4677
466
      storeArgs.emplace_back(Idx);
4678
4679
6.27k
    for (unsigned i = 1; i < 3; 
i++4.18k
) {
4680
4.18k
      if (i < coordSize)
4681
1.73k
        storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i));
4682
2.44k
      else
4683
2.44k
        storeArgs.emplace_back(undefI);
4684
4.18k
    }
4685
    // TODO: support mip for texture ST
4686
2.09k
  }
4687
4688
  // RawBufferVectorStore only takes a single value and alignment arguments.
4689
16.7k
  if (opcode == DXIL::OpCode::RawBufferVectorStore) {
4690
2.33k
    storeArgs.emplace_back(val);
4691
2.33k
    storeArgs.emplace_back(Alignment);
4692
2.33k
    Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty);
4693
2.33k
    Builder.CreateCall(F, storeArgs);
4694
2.33k
    return;
4695
2.33k
  }
4696
14.4k
  Function *F = OP->GetOpFunc(opcode, EltTy);
4697
4698
14.4k
  constexpr unsigned MaxStoreElemCount = 4;
4699
14.4k
  const unsigned CompCount = Ty->isVectorTy() ? 
Ty->getVectorNumElements()8.20k
:
16.25k
;
4700
14.4k
  const unsigned StoreInstCount =
4701
14.4k
      (CompCount / MaxStoreElemCount) + (CompCount % MaxStoreElemCount != 0);
4702
14.4k
  SmallVector<decltype(storeArgs), 4> storeArgsList;
4703
4704
  // Max number of element to store should be 16 (for a 4x4 matrix)
4705
14.4k
  DXASSERT_NOMSG(StoreInstCount >= 1 && StoreInstCount <= 4);
4706
4707
  // If number of elements to store exceeds the maximum number of elements
4708
  // that can be stored in a single store call,  make sure to generate enough
4709
  // store calls to store all elements
4710
29.3k
  for (unsigned j = 0; j < StoreInstCount; 
j++14.8k
) {
4711
14.8k
    decltype(storeArgs) newStoreArgs;
4712
14.8k
    for (Value *storeArg : storeArgs)
4713
61.6k
      newStoreArgs.emplace_back(storeArg);
4714
14.8k
    storeArgsList.emplace_back(newStoreArgs);
4715
14.8k
  }
4716
4717
29.3k
  for (unsigned j = 0; j < storeArgsList.size(); 
j++14.8k
) {
4718
    // For second and subsequent store calls, increment the resource-appropriate
4719
    // index or offset parameter.
4720
14.8k
    if (j > 0) {
4721
436
      unsigned EltSize = OP->GetAllocSizeForType(EltTy);
4722
436
      unsigned NewCoord = EltSize * MaxStoreElemCount * j;
4723
436
      Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord);
4724
436
      NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal);
4725
436
      storeArgsList[j][OffsetIdx] = NewCoordVal;
4726
436
    }
4727
4728
    // Set value parameters.
4729
14.8k
    uint8_t mask = 0;
4730
14.8k
    if (Ty->isVectorTy()) {
4731
8.64k
      unsigned vecSize =
4732
8.64k
          std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) -
4733
8.64k
          (j * MaxStoreElemCount);
4734
8.64k
      Value *emptyVal = undefVal;
4735
8.64k
      if (IsTyped) {
4736
1.73k
        mask = DXIL::kCompMask_All;
4737
1.73k
        emptyVal = Builder.CreateExtractElement(val, (uint64_t)0);
4738
1.73k
      }
4739
4740
43.2k
      for (unsigned i = 0; i < MaxStoreElemCount; 
i++34.5k
) {
4741
34.5k
        if (i < vecSize) {
4742
23.7k
          storeArgsList[j].emplace_back(
4743
23.7k
              Builder.CreateExtractElement(val, (j * MaxStoreElemCount) + i));
4744
23.7k
          mask |= (1 << i);
4745
23.7k
        } else {
4746
10.8k
          storeArgsList[j].emplace_back(emptyVal);
4747
10.8k
        }
4748
34.5k
      }
4749
4750
8.64k
    } else {
4751
6.25k
      if (IsTyped) {
4752
1.59k
        mask = DXIL::kCompMask_All;
4753
1.59k
        storeArgsList[j].emplace_back(val);
4754
1.59k
        storeArgsList[j].emplace_back(val);
4755
1.59k
        storeArgsList[j].emplace_back(val);
4756
1.59k
        storeArgsList[j].emplace_back(val);
4757
4.65k
      } else {
4758
4.65k
        storeArgsList[j].emplace_back(val);
4759
4.65k
        storeArgsList[j].emplace_back(undefVal);
4760
4.65k
        storeArgsList[j].emplace_back(undefVal);
4761
4.65k
        storeArgsList[j].emplace_back(undefVal);
4762
4.65k
        mask = DXIL::kCompMask_X;
4763
4.65k
      }
4764
6.25k
    }
4765
4766
14.8k
    if (is64 && 
IsTyped1.49k
) {
4767
218
      unsigned size = 1;
4768
218
      if (Ty->isVectorTy()) {
4769
36
        size =
4770
36
            std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) -
4771
36
            (j * MaxStoreElemCount);
4772
36
      }
4773
218
      DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords");
4774
218
      unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore ||
4775
218
                                   
opcode == DXIL::OpCode::TextureStoreSample114
4776
218
                               ? 
DXIL::OperandIndex::kTextureStoreVal0OpIdx112
4777
218
                               : 
DXIL::OperandIndex::kBufferStoreVal0OpIdx106
;
4778
218
      Value *V0 = storeArgsList[j][val0OpIdx];
4779
218
      Value *V1 = storeArgsList[j][val0OpIdx + 1];
4780
4781
218
      Value *vals32[4];
4782
218
      EltTy = Ty->getScalarType();
4783
218
      Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder);
4784
      // Fill the uninit vals.
4785
218
      if (size == 1) {
4786
190
        vals32[2] = vals32[0];
4787
190
        vals32[3] = vals32[1];
4788
190
      }
4789
      // Change valOp to 32 version.
4790
1.09k
      for (unsigned i = 0; i < 4; 
i++872
) {
4791
872
        storeArgsList[j][val0OpIdx + i] = vals32[i];
4792
872
      }
4793
      // change mask for double
4794
218
      if (opcode == DXIL::OpCode::RawBufferStore) {
4795
0
        mask = size == 1 ? DXIL::kCompMask_X | DXIL::kCompMask_Y
4796
0
                         : DXIL::kCompMask_All;
4797
0
      }
4798
218
    }
4799
4800
14.8k
    storeArgsList[j].emplace_back(OP->GetU8Const(mask)); // mask
4801
14.8k
    if (opcode == DXIL::OpCode::RawBufferStore)
4802
11.5k
      storeArgsList[j].emplace_back(Alignment); // alignment only for raw buffer
4803
3.33k
    else if (opcode == DXIL::OpCode::TextureStoreSample) {
4804
80
      storeArgsList[j].emplace_back(
4805
80
          sampIdx ? 
sampIdx40
4806
80
                  : 
Builder.getInt32(0)40
); // sample idx only for MS textures
4807
80
    }
4808
14.8k
    Builder.CreateCall(F, storeArgsList[j]);
4809
14.8k
  }
4810
14.4k
}
4811
4812
Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
4813
                              HLOperationLowerHelper &helper,
4814
                              HLObjectOperationLowerHelper *pObjHelper,
4815
3.06k
                              bool &Translated) {
4816
3.06k
  hlsl::OP *hlslOP = &helper.hlslOP;
4817
3.06k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
4818
4819
3.06k
  IRBuilder<> Builder(CI);
4820
3.06k
  DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
4821
4822
3.06k
  Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx);
4823
3.06k
  Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx);
4824
3.06k
  Value *UndefI = UndefValue::get(Builder.getInt32Ty());
4825
3.06k
  TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP);
4826
4827
3.06k
  return nullptr;
4828
3.06k
}
4829
} // namespace
4830
4831
// Atomic intrinsics.
4832
namespace {
4833
// Atomic intrinsics.
4834
struct AtomicHelper {
4835
  AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType = nullptr);
4836
  AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
4837
               Value *baseOffset, Type *opType = nullptr);
4838
  OP::OpCode opcode;
4839
  Value *handle;
4840
  Value *addr;
4841
  Value *offset; // Offset for structrued buffer.
4842
  Value *value;
4843
  Value *originalValue;
4844
  Value *compareValue;
4845
  Type *operationType;
4846
};
4847
4848
// For MOP version of Interlocked*.
4849
AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType)
4850
2.48k
    : opcode(op), handle(h), offset(nullptr), originalValue(nullptr),
4851
2.48k
      operationType(opType) {
4852
2.48k
  addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex);
4853
2.48k
  if (op == OP::OpCode::AtomicCompareExchange) {
4854
962
    compareValue = CI->getArgOperand(
4855
962
        HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex);
4856
962
    value =
4857
962
        CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex);
4858
962
    if (CI->getNumArgOperands() ==
4859
962
        (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1))
4860
526
      originalValue = CI->getArgOperand(
4861
526
          HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex);
4862
1.52k
  } else {
4863
1.52k
    value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex);
4864
1.52k
    if (CI->getNumArgOperands() ==
4865
1.52k
        (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1))
4866
1.34k
      originalValue = CI->getArgOperand(
4867
1.34k
          HLOperandIndex::kObjectInterlockedOriginalValueOpIndex);
4868
1.52k
  }
4869
2.48k
  if (nullptr == operationType)
4870
2.32k
    operationType = value->getType();
4871
2.48k
}
4872
// For IOP version of Interlocked*.
4873
AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx,
4874
                           Value *baseOffset, Type *opType)
4875
4.18k
    : opcode(op), handle(h), addr(bufIdx), offset(baseOffset),
4876
4.18k
      originalValue(nullptr), operationType(opType) {
4877
4.18k
  if (op == OP::OpCode::AtomicCompareExchange) {
4878
1.42k
    compareValue =
4879
1.42k
        CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
4880
1.42k
    value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
4881
1.42k
    if (CI->getNumArgOperands() ==
4882
1.42k
        (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1))
4883
692
      originalValue = CI->getArgOperand(
4884
692
          HLOperandIndex::kInterlockedCmpOriginalValueOpIndex);
4885
2.75k
  } else {
4886
2.75k
    value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
4887
2.75k
    if (CI->getNumArgOperands() ==
4888
2.75k
        (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1))
4889
720
      originalValue =
4890
720
          CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex);
4891
2.75k
  }
4892
4.18k
  if (nullptr == operationType)
4893
4.10k
    operationType = value->getType();
4894
4.18k
}
4895
4896
void TranslateAtomicBinaryOperation(AtomicHelper &helper,
4897
                                    DXIL::AtomicBinOpCode atomicOp,
4898
4.28k
                                    IRBuilder<> &Builder, hlsl::OP *hlslOP) {
4899
4.28k
  Value *handle = helper.handle;
4900
4.28k
  Value *addr = helper.addr;
4901
4.28k
  Value *val = helper.value;
4902
4.28k
  Type *Ty = helper.operationType;
4903
4.28k
  Type *valTy = val->getType();
4904
4905
4.28k
  Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
4906
4907
4.28k
  Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
4908
4.28k
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
4909
4.28k
  Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp));
4910
4911
4.28k
  if (Ty != valTy)
4912
72
    val = Builder.CreateBitCast(val, Ty);
4913
4914
4.28k
  Value *args[] = {opArg,  handle, atomicOpArg,
4915
4.28k
                   undefI, undefI, undefI, // coordinates
4916
4.28k
                   val};
4917
4918
  // Setup coordinates.
4919
4.28k
  if (addr->getType()->isVectorTy()) {
4920
250
    unsigned vectorNumElements = addr->getType()->getVectorNumElements();
4921
250
    DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op");
4922
250
    assert(vectorNumElements <= 3);
4923
846
    for (unsigned i = 0; i < vectorNumElements; 
i++596
) {
4924
596
      Value *Elt = Builder.CreateExtractElement(addr, i);
4925
596
      args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt;
4926
596
    }
4927
250
  } else
4928
4.03k
    args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr;
4929
4930
  // Set offset for structured buffer.
4931
4.28k
  if (helper.offset)
4932
1.00k
    args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset;
4933
4934
4.28k
  Value *origVal =
4935
4.28k
      Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp));
4936
4.28k
  if (helper.originalValue) {
4937
2.06k
    if (Ty != valTy)
4938
72
      origVal = Builder.CreateBitCast(origVal, valTy);
4939
2.06k
    Builder.CreateStore(origVal, helper.originalValue);
4940
2.06k
  }
4941
4.28k
}
4942
4943
Value *TranslateMopAtomicBinaryOperation(
4944
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
4945
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
4946
1.52k
    bool &Translated) {
4947
1.52k
  hlsl::OP *hlslOP = &helper.hlslOP;
4948
4949
1.52k
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
4950
1.52k
  IRBuilder<> Builder(CI);
4951
4952
1.52k
  switch (IOP) {
4953
244
  case IntrinsicOp::MOP_InterlockedAdd:
4954
316
  case IntrinsicOp::MOP_InterlockedAdd64: {
4955
316
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4956
316
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder,
4957
316
                                   hlslOP);
4958
316
  } break;
4959
72
  case IntrinsicOp::MOP_InterlockedAnd:
4960
144
  case IntrinsicOp::MOP_InterlockedAnd64: {
4961
144
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4962
144
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder,
4963
144
                                   hlslOP);
4964
144
  } break;
4965
216
  case IntrinsicOp::MOP_InterlockedExchange:
4966
424
  case IntrinsicOp::MOP_InterlockedExchange64: {
4967
424
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4968
424
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
4969
424
                                   Builder, hlslOP);
4970
424
  } break;
4971
48
  case IntrinsicOp::MOP_InterlockedExchangeFloat: {
4972
48
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle,
4973
48
                        Type::getInt32Ty(CI->getContext()));
4974
48
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
4975
48
                                   Builder, hlslOP);
4976
48
  } break;
4977
58
  case IntrinsicOp::MOP_InterlockedMax:
4978
118
  case IntrinsicOp::MOP_InterlockedMax64: {
4979
118
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4980
118
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder,
4981
118
                                   hlslOP);
4982
118
  } break;
4983
58
  case IntrinsicOp::MOP_InterlockedMin:
4984
118
  case IntrinsicOp::MOP_InterlockedMin64: {
4985
118
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4986
118
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder,
4987
118
                                   hlslOP);
4988
118
  } break;
4989
34
  case IntrinsicOp::MOP_InterlockedUMax: {
4990
34
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4991
34
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder,
4992
34
                                   hlslOP);
4993
34
  } break;
4994
34
  case IntrinsicOp::MOP_InterlockedUMin: {
4995
34
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
4996
34
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder,
4997
34
                                   hlslOP);
4998
34
  } break;
4999
72
  case IntrinsicOp::MOP_InterlockedOr:
5000
144
  case IntrinsicOp::MOP_InterlockedOr64: {
5001
144
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
5002
144
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder,
5003
144
                                   hlslOP);
5004
144
  } break;
5005
72
  case IntrinsicOp::MOP_InterlockedXor:
5006
144
  case IntrinsicOp::MOP_InterlockedXor64:
5007
144
  default: {
5008
144
    DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor ||
5009
144
                 IOP == IntrinsicOp::MOP_InterlockedXor64,
5010
144
             "invalid MOP atomic intrinsic");
5011
144
    AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle);
5012
144
    TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder,
5013
144
                                   hlslOP);
5014
144
  } break;
5015
1.52k
  }
5016
5017
1.52k
  return nullptr;
5018
1.52k
}
5019
void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder,
5020
2.38k
                            hlsl::OP *hlslOP) {
5021
2.38k
  Value *handle = helper.handle;
5022
2.38k
  Value *addr = helper.addr;
5023
2.38k
  Value *val = helper.value;
5024
2.38k
  Value *cmpVal = helper.compareValue;
5025
5026
2.38k
  Type *Ty = helper.operationType;
5027
2.38k
  Type *valTy = val->getType();
5028
5029
2.38k
  Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext()));
5030
5031
2.38k
  Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType());
5032
2.38k
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode));
5033
5034
2.38k
  if (Ty != valTy) {
5035
168
    val = Builder.CreateBitCast(val, Ty);
5036
168
    if (cmpVal)
5037
168
      cmpVal = Builder.CreateBitCast(cmpVal, Ty);
5038
168
  }
5039
5040
2.38k
  Value *args[] = {opArg,  handle, undefI, undefI, undefI, // coordinates
5041
2.38k
                   cmpVal, val};
5042
5043
  // Setup coordinates.
5044
2.38k
  if (addr->getType()->isVectorTy()) {
5045
60
    unsigned vectorNumElements = addr->getType()->getVectorNumElements();
5046
60
    DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op");
5047
60
    assert(vectorNumElements <= 3);
5048
196
    for (unsigned i = 0; i < vectorNumElements; 
i++136
) {
5049
136
      Value *Elt = Builder.CreateExtractElement(addr, i);
5050
136
      args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt;
5051
136
    }
5052
60
  } else
5053
2.32k
    args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr;
5054
5055
  // Set offset for structured buffer.
5056
2.38k
  if (helper.offset)
5057
536
    args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset;
5058
5059
2.38k
  Value *origVal = Builder.CreateCall(dxilAtomic, args);
5060
2.38k
  if (helper.originalValue) {
5061
1.21k
    if (Ty != valTy)
5062
84
      origVal = Builder.CreateBitCast(origVal, valTy);
5063
1.21k
    Builder.CreateStore(origVal, helper.originalValue);
5064
1.21k
  }
5065
2.38k
}
5066
5067
Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
5068
                                 OP::OpCode opcode,
5069
                                 HLOperationLowerHelper &helper,
5070
                                 HLObjectOperationLowerHelper *pObjHelper,
5071
962
                                 bool &Translated) {
5072
962
  hlsl::OP *hlslOP = &helper.hlslOP;
5073
5074
962
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5075
962
  IRBuilder<> Builder(CI);
5076
962
  Type *opType = nullptr;
5077
962
  if (IOP == IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise ||
5078
962
      
IOP == IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise906
)
5079
112
    opType = Type::getInt32Ty(CI->getContext());
5080
962
  AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle,
5081
962
                            opType);
5082
962
  TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP);
5083
962
  return nullptr;
5084
962
}
5085
5086
void TranslateSharedMemOrNodeAtomicBinOp(CallInst *CI, IntrinsicOp IOP,
5087
1.49k
                                         Value *addr) {
5088
1.49k
  AtomicRMWInst::BinOp Op;
5089
1.49k
  IRBuilder<> Builder(CI);
5090
1.49k
  Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex);
5091
1.49k
  PointerType *ptrType = dyn_cast<PointerType>(
5092
1.49k
      CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
5093
1.49k
  bool needCast = ptrType && ptrType->getElementType()->isFloatTy();
5094
1.49k
  switch (IOP) {
5095
376
  case IntrinsicOp::IOP_InterlockedAdd:
5096
376
    Op = AtomicRMWInst::BinOp::Add;
5097
376
    break;
5098
104
  case IntrinsicOp::IOP_InterlockedAnd:
5099
104
    Op = AtomicRMWInst::BinOp::And;
5100
104
    break;
5101
472
  case IntrinsicOp::IOP_InterlockedExchange:
5102
472
    if (needCast) {
5103
48
      val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext()));
5104
48
      addr = Builder.CreateBitCast(
5105
48
          addr, Type::getInt32PtrTy(CI->getContext(),
5106
48
                                    addr->getType()->getPointerAddressSpace()));
5107
48
    }
5108
472
    Op = AtomicRMWInst::BinOp::Xchg;
5109
472
    break;
5110
68
  case IntrinsicOp::IOP_InterlockedMax:
5111
68
    Op = AtomicRMWInst::BinOp::Max;
5112
68
    break;
5113
84
  case IntrinsicOp::IOP_InterlockedUMax:
5114
84
    Op = AtomicRMWInst::BinOp::UMax;
5115
84
    break;
5116
60
  case IntrinsicOp::IOP_InterlockedMin:
5117
60
    Op = AtomicRMWInst::BinOp::Min;
5118
60
    break;
5119
68
  case IntrinsicOp::IOP_InterlockedUMin:
5120
68
    Op = AtomicRMWInst::BinOp::UMin;
5121
68
    break;
5122
156
  case IntrinsicOp::IOP_InterlockedOr:
5123
156
    Op = AtomicRMWInst::BinOp::Or;
5124
156
    break;
5125
104
  case IntrinsicOp::IOP_InterlockedXor:
5126
104
  default:
5127
104
    DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic");
5128
104
    Op = AtomicRMWInst::BinOp::Xor;
5129
104
    break;
5130
1.49k
  }
5131
5132
1.49k
  Value *Result = Builder.CreateAtomicRMW(
5133
1.49k
      Op, addr, val, AtomicOrdering::SequentiallyConsistent);
5134
1.49k
  if (CI->getNumArgOperands() >
5135
1.49k
      HLOperandIndex::kInterlockedOriginalValueOpIndex) {
5136
574
    if (needCast)
5137
48
      Result =
5138
48
          Builder.CreateBitCast(Result, Type::getFloatTy(CI->getContext()));
5139
574
    Builder.CreateStore(
5140
574
        Result,
5141
574
        CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex));
5142
574
  }
5143
1.49k
}
5144
5145
3.65k
static Value *SkipAddrSpaceCast(Value *Ptr) {
5146
3.65k
  if (AddrSpaceCastInst *CastInst = dyn_cast<AddrSpaceCastInst>(Ptr))
5147
2.25k
    return CastInst->getOperand(0);
5148
1.40k
  if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Ptr)) {
5149
400
    if (ConstExpr->getOpcode() == Instruction::AddrSpaceCast) {
5150
400
      return ConstExpr->getOperand(0);
5151
400
    }
5152
400
  }
5153
1.00k
  return Ptr;
5154
1.40k
}
5155
5156
Value *
5157
TranslateNodeIncrementOutputCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
5158
                                  HLOperationLowerHelper &helper,
5159
                                  HLObjectOperationLowerHelper *pObjHelper,
5160
84
                                  bool isPerThread, bool &Translated) {
5161
5162
84
  hlsl::OP *OP = &helper.hlslOP;
5163
84
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5164
84
  Value *count =
5165
84
      CI->getArgOperand(HLOperandIndex::kIncrementOutputCountCountIdx);
5166
84
  Function *dxilFunc = OP->GetOpFunc(op, CI->getType());
5167
84
  Value *opArg = OP->GetU32Const((unsigned)op);
5168
84
  Value *perThread = OP->GetI1Const(isPerThread);
5169
5170
84
  Value *args[] = {opArg, handle, count, perThread};
5171
5172
84
  IRBuilder<> Builder(CI);
5173
84
  Builder.CreateCall(dxilFunc, args);
5174
84
  return nullptr;
5175
84
}
5176
5177
/*
5178
HLSL:
5179
void EmptyNodeOutput::GroupIncrementOutputCount(uint count)
5180
DXIL:
5181
void @dx.op.groupIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle
5182
%NodeOutput, i32 count)
5183
*/
5184
Value *TranslateNodeGroupIncrementOutputCount(
5185
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
5186
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5187
76
    bool &Translated) {
5188
76
  return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper,
5189
76
                                           /*isPerThread*/ false, Translated);
5190
76
}
5191
5192
/*
5193
HLSL:
5194
void EmptyNodeOutput::ThreadIncrementOutputCount(uint count)
5195
DXIL:
5196
void @dx.op.threadIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle
5197
%NodeOutput, i32 count)
5198
*/
5199
Value *TranslateNodeThreadIncrementOutputCount(
5200
    CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
5201
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5202
8
    bool &Translated) {
5203
8
  return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper,
5204
8
                                           /*isPerThread*/ true, Translated);
5205
8
}
5206
5207
// For known non-groupshared, verify that the destination param is valid
5208
void ValidateAtomicDestination(CallInst *CI,
5209
1.00k
                               HLObjectOperationLowerHelper *pObjHelper) {
5210
1.00k
  Value *dest = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
5211
  // If we encounter a gep, we may provide a more specific error message
5212
1.00k
  bool hasGep = isa<GetElementPtrInst>(dest);
5213
5214
  // Confirm that dest is a properly-used UAV
5215
5216
  // Drill through subscripts and geps, anything else indicates a misuse
5217
2.23k
  while (true) {
5218
2.23k
    if (GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(dest)) {
5219
284
      dest = gep->getPointerOperand();
5220
284
      continue;
5221
284
    }
5222
1.95k
    if (CallInst *handle = dyn_cast<CallInst>(dest)) {
5223
1.86k
      hlsl::HLOpcodeGroup group =
5224
1.86k
          hlsl::GetHLOpcodeGroup(handle->getCalledFunction());
5225
1.86k
      if (group != HLOpcodeGroup::HLSubscript)
5226
914
        break;
5227
946
      dest = handle->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
5228
946
      continue;
5229
1.86k
    }
5230
90
    break;
5231
1.95k
  }
5232
5233
1.00k
  if (pObjHelper->GetRC(dest) == DXIL::ResourceClass::UAV) {
5234
914
    DXIL::ResourceKind RK = pObjHelper->GetRK(dest);
5235
914
    if (DXIL::IsStructuredBuffer(RK))
5236
404
      return; // no errors
5237
510
    if (DXIL::IsTyped(RK)) {
5238
510
      if (hasGep)
5239
16
        dxilutil::EmitErrorOnInstruction(
5240
16
            CI, "Typed resources used in atomic operations must have a scalar "
5241
16
                "element type.");
5242
510
      return; // error emitted or else no errors
5243
510
    }
5244
510
  }
5245
5246
90
  dxilutil::EmitErrorOnInstruction(
5247
90
      CI, "Atomic operation targets must be groupshared, Node Record or UAV.");
5248
90
}
5249
5250
Value *TranslateIopAtomicBinaryOperation(
5251
    CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
5252
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5253
2.42k
    bool &Translated) {
5254
2.42k
  Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
5255
2.42k
  addr = SkipAddrSpaceCast(addr);
5256
5257
2.42k
  unsigned addressSpace = addr->getType()->getPointerAddressSpace();
5258
2.42k
  if (addressSpace == DXIL::kTGSMAddrSpace ||
5259
2.42k
      
addressSpace == DXIL::kNodeRecordAddrSpace974
)
5260
1.49k
    TranslateSharedMemOrNodeAtomicBinOp(CI, IOP, addr);
5261
928
  else {
5262
    // If not groupshared or node record, we either have an error case or will
5263
    // translate the atomic op in the process of translating users of the
5264
    // subscript operator Mark not translated and validate dest param
5265
928
    Translated = false;
5266
928
    ValidateAtomicDestination(CI, pObjHelper);
5267
928
  }
5268
5269
2.42k
  return nullptr;
5270
2.42k
}
5271
5272
1.16k
void TranslateSharedMemOrNodeAtomicCmpXChg(CallInst *CI, Value *addr) {
5273
1.16k
  Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex);
5274
1.16k
  Value *cmpVal =
5275
1.16k
      CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex);
5276
1.16k
  IRBuilder<> Builder(CI);
5277
5278
1.16k
  PointerType *ptrType = dyn_cast<PointerType>(
5279
1.16k
      CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType());
5280
1.16k
  bool needCast = false;
5281
1.16k
  if (ptrType && ptrType->getElementType()->isFloatTy()) {
5282
166
    needCast = true;
5283
166
    val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext()));
5284
166
    cmpVal = Builder.CreateBitCast(cmpVal, Type::getInt32Ty(CI->getContext()));
5285
166
    unsigned addrSpace = cast<PointerType>(addr->getType())->getAddressSpace();
5286
166
    addr = Builder.CreateBitCast(
5287
166
        addr, Type::getInt32PtrTy(CI->getContext(), addrSpace));
5288
166
  }
5289
5290
1.16k
  Value *Result = Builder.CreateAtomicCmpXchg(
5291
1.16k
      addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent,
5292
1.16k
      AtomicOrdering::SequentiallyConsistent);
5293
5294
1.16k
  if (CI->getNumArgOperands() >
5295
1.16k
      HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) {
5296
538
    Value *originVal = Builder.CreateExtractValue(Result, 0);
5297
538
    if (needCast)
5298
56
      originVal =
5299
56
          Builder.CreateBitCast(originVal, Type::getFloatTy(CI->getContext()));
5300
538
    Builder.CreateStore(
5301
538
        originVal,
5302
538
        CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex));
5303
538
  }
5304
1.16k
}
5305
5306
Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP,
5307
                                 DXIL::OpCode opcode,
5308
                                 HLOperationLowerHelper &helper,
5309
                                 HLObjectOperationLowerHelper *pObjHelper,
5310
1.23k
                                 bool &Translated) {
5311
1.23k
  Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex);
5312
1.23k
  addr = SkipAddrSpaceCast(addr);
5313
5314
1.23k
  unsigned addressSpace = addr->getType()->getPointerAddressSpace();
5315
1.23k
  if (addressSpace == DXIL::kTGSMAddrSpace ||
5316
1.23k
      
addressSpace == DXIL::kNodeRecordAddrSpace176
)
5317
1.16k
    TranslateSharedMemOrNodeAtomicCmpXChg(CI, addr);
5318
76
  else {
5319
    // If not groupshared, we either have an error case or will translate
5320
    // the atomic op in the process of translating users of the subscript
5321
    // operator Mark not translated and validate dest param
5322
76
    Translated = false;
5323
76
    ValidateAtomicDestination(CI, pObjHelper);
5324
76
  }
5325
5326
1.23k
  return nullptr;
5327
1.23k
}
5328
} // namespace
5329
5330
// Process Tess Factor.
5331
namespace {
5332
5333
// Clamp to [0.0f..1.0f], NaN->0.0f.
5334
Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP,
5335
288
                              IRBuilder<> &Builder) {
5336
288
  float fMin = 0;
5337
288
  float fMax = 1;
5338
288
  Type *f32Ty = input->getType()->getScalarType();
5339
288
  Value *minFactor = ConstantFP::get(f32Ty, fMin);
5340
288
  Value *maxFactor = ConstantFP::get(f32Ty, fMax);
5341
288
  Type *Ty = input->getType();
5342
288
  if (Ty->isVectorTy())
5343
288
    minFactor = SplatToVector(minFactor, input->getType(), Builder);
5344
288
  Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor,
5345
288
                                           hlslOP, Builder);
5346
288
  if (Ty->isVectorTy())
5347
288
    maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
5348
288
  return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP,
5349
288
                                    Builder);
5350
288
}
5351
5352
// Clamp to [1.0f..Inf], NaN->1.0f.
5353
288
Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5354
288
  float fMin = 1.0;
5355
288
  Type *f32Ty = input->getType()->getScalarType();
5356
288
  Value *minFactor = ConstantFP::get(f32Ty, fMin);
5357
288
  minFactor = SplatToVector(minFactor, input->getType(), Builder);
5358
288
  return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor,
5359
288
                                    hlslOP, Builder);
5360
288
}
5361
5362
// Do partitioning-specific clamping.
5363
Value *ClampTessFactor(Value *input,
5364
                       DXIL::TessellatorPartitioning partitionMode,
5365
680
                       hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5366
680
  const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64;
5367
680
  const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63;
5368
5369
680
  const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2;
5370
680
  const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1;
5371
5372
680
  const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64;
5373
5374
680
  float fMin;
5375
680
  float fMax;
5376
680
  switch (partitionMode) {
5377
152
  case DXIL::TessellatorPartitioning::Integer:
5378
152
    fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
5379
152
    fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR;
5380
152
    break;
5381
152
  case DXIL::TessellatorPartitioning::Pow2:
5382
152
    fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
5383
152
    fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
5384
152
    break;
5385
224
  case DXIL::TessellatorPartitioning::FractionalOdd:
5386
224
    fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
5387
224
    fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
5388
224
    break;
5389
152
  case DXIL::TessellatorPartitioning::FractionalEven:
5390
152
  default:
5391
152
    DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven,
5392
152
             "invalid partition mode");
5393
152
    fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
5394
152
    fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
5395
152
    break;
5396
680
  }
5397
680
  Type *f32Ty = input->getType()->getScalarType();
5398
680
  Value *minFactor = ConstantFP::get(f32Ty, fMin);
5399
680
  Value *maxFactor = ConstantFP::get(f32Ty, fMax);
5400
680
  Type *Ty = input->getType();
5401
680
  if (Ty->isVectorTy())
5402
632
    minFactor = SplatToVector(minFactor, input->getType(), Builder);
5403
680
  Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor,
5404
680
                                           hlslOP, Builder);
5405
680
  if (Ty->isVectorTy())
5406
632
    maxFactor = SplatToVector(maxFactor, input->getType(), Builder);
5407
680
  return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP,
5408
680
                                    Builder);
5409
680
}
5410
5411
// round up for integer/pow2 partitioning
5412
// note that this code assumes the inputs should be in the range [1, inf),
5413
// which should be enforced by the clamp above.
5414
Value *RoundUpTessFactor(Value *input,
5415
                         DXIL::TessellatorPartitioning partitionMode,
5416
704
                         hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5417
704
  switch (partitionMode) {
5418
152
  case DXIL::TessellatorPartitioning::Integer:
5419
152
    return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP,
5420
152
                                     Builder);
5421
152
  case DXIL::TessellatorPartitioning::Pow2: {
5422
152
    const unsigned kExponentMask = 0x7f800000;
5423
152
    const unsigned kExponentLSB = 0x00800000;
5424
152
    const unsigned kMantissaMask = 0x007fffff;
5425
152
    Type *Ty = input->getType();
5426
    // (val = (asuint(val) & mantissamask) ?
5427
    //      (asuint(val) & exponentmask) + exponentbump :
5428
    //      asuint(val) & exponentmask;
5429
152
    Type *uintTy = Type::getInt32Ty(Ty->getContext());
5430
152
    if (Ty->isVectorTy())
5431
152
      uintTy = VectorType::get(uintTy, Ty->getVectorNumElements());
5432
152
    Value *uintVal =
5433
152
        Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy);
5434
5435
152
    Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask);
5436
152
    mantMask = SplatToVector(mantMask, uintTy, Builder);
5437
152
    Value *manVal = Builder.CreateAnd(uintVal, mantMask);
5438
5439
152
    Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask);
5440
152
    expMask = SplatToVector(expMask, uintTy, Builder);
5441
152
    Value *expVal = Builder.CreateAnd(uintVal, expMask);
5442
5443
152
    Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB);
5444
152
    expLSB = SplatToVector(expLSB, uintTy, Builder);
5445
152
    Value *newExpVal = Builder.CreateAdd(expVal, expLSB);
5446
5447
152
    Value *manValNotZero =
5448
152
        Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy));
5449
152
    Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal);
5450
152
    return Builder.CreateUIToFP(factors, Ty);
5451
0
  } break;
5452
152
  case DXIL::TessellatorPartitioning::FractionalEven:
5453
400
  case DXIL::TessellatorPartitioning::FractionalOdd:
5454
400
    return input;
5455
0
  default:
5456
0
    DXASSERT(0, "invalid partition mode");
5457
0
    return nullptr;
5458
704
  }
5459
704
}
5460
5461
Value *TranslateProcessIsolineTessFactors(
5462
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5463
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5464
32
    bool &Translated) {
5465
32
  hlsl::OP *hlslOP = &helper.hlslOP;
5466
  // Get partition mode
5467
32
  DXASSERT_NOMSG(helper.functionProps);
5468
32
  DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull,
5469
32
           "must be hull shader");
5470
32
  DXIL::TessellatorPartitioning partition =
5471
32
      helper.functionProps->ShaderProps.HS.partition;
5472
5473
32
  IRBuilder<> Builder(CI);
5474
5475
32
  Value *rawDetailFactor =
5476
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor);
5477
32
  rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0);
5478
5479
32
  Value *rawDensityFactor =
5480
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor);
5481
32
  rawDensityFactor =
5482
32
      Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0);
5483
5484
32
  Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2));
5485
32
  init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0);
5486
32
  init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1);
5487
5488
32
  Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder);
5489
32
  Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
5490
5491
32
  Value *roundedDetailFactor =
5492
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor);
5493
32
  Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1));
5494
32
  Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0);
5495
32
  temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0);
5496
32
  Builder.CreateStore(temp, roundedDetailFactor);
5497
5498
32
  Value *roundedDensityFactor =
5499
32
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor);
5500
32
  Value *roundedY = Builder.CreateExtractElement(rounded, 1);
5501
32
  temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0);
5502
32
  Builder.CreateStore(temp, roundedDensityFactor);
5503
32
  return nullptr;
5504
32
}
5505
5506
// 3 inputs, 1 result
5507
Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP,
5508
120
                            IRBuilder<> &Builder) {
5509
120
  Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
5510
120
  Value *input1 = Builder.CreateExtractElement(input, 1);
5511
120
  Value *input2 = Builder.CreateExtractElement(input, 2);
5512
5513
120
  if (opcode == DXIL::OpCode::FMax || 
opcode == DXIL::OpCode::FMin80
) {
5514
72
    Value *temp =
5515
72
        TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
5516
72
    Value *combined =
5517
72
        TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder);
5518
72
    return combined;
5519
72
  }
5520
5521
  // Avg.
5522
48
  Value *temp = Builder.CreateFAdd(input0, input1);
5523
48
  Value *combined = Builder.CreateFAdd(temp, input2);
5524
48
  Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0);
5525
48
  combined = Builder.CreateFMul(combined, rcp);
5526
48
  return combined;
5527
120
}
5528
5529
// 4 inputs, 1 result
5530
Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
5531
120
                             hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5532
120
  Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
5533
120
  Value *input1 = Builder.CreateExtractElement(input, 1);
5534
120
  Value *input2 = Builder.CreateExtractElement(input, 2);
5535
120
  Value *input3 = Builder.CreateExtractElement(input, 3);
5536
5537
120
  if (opcode == DXIL::OpCode::FMax || 
opcode == DXIL::OpCode::FMin80
) {
5538
72
    Value *temp0 =
5539
72
        TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
5540
72
    Value *temp1 =
5541
72
        TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
5542
72
    Value *combined =
5543
72
        TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder);
5544
72
    return combined;
5545
72
  }
5546
5547
  // Avg.
5548
48
  Value *temp0 = Builder.CreateFAdd(input0, input1);
5549
48
  Value *temp1 = Builder.CreateFAdd(input2, input3);
5550
48
  Value *combined = Builder.CreateFAdd(temp0, temp1);
5551
48
  Value *rcp = ConstantFP::get(input0->getType(), 0.25);
5552
48
  combined = Builder.CreateFMul(combined, rcp);
5553
48
  return combined;
5554
120
}
5555
5556
// 4 inputs, 2 result
5557
Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode,
5558
120
                               hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5559
120
  Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0);
5560
120
  Value *input1 = Builder.CreateExtractElement(input, 1);
5561
120
  Value *input2 = Builder.CreateExtractElement(input, 2);
5562
120
  Value *input3 = Builder.CreateExtractElement(input, 3);
5563
5564
120
  if (opcode == DXIL::OpCode::FMax || 
opcode == DXIL::OpCode::FMin80
) {
5565
72
    Value *temp0 =
5566
72
        TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder);
5567
72
    Value *temp1 =
5568
72
        TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder);
5569
72
    Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
5570
72
    combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
5571
72
    combined = Builder.CreateInsertElement(combined, temp1, 1);
5572
72
    return combined;
5573
72
  }
5574
5575
  // Avg.
5576
48
  Value *temp0 = Builder.CreateFAdd(input0, input1);
5577
48
  Value *temp1 = Builder.CreateFAdd(input2, input3);
5578
48
  Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2));
5579
48
  combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0);
5580
48
  combined = Builder.CreateInsertElement(combined, temp1, 1);
5581
48
  Constant *rcp = ConstantFP::get(input0->getType(), 0.5);
5582
48
  rcp = ConstantVector::getSplat(2, rcp);
5583
48
  combined = Builder.CreateFMul(combined, rcp);
5584
48
  return combined;
5585
120
}
5586
5587
Value *ResolveSmallValue(Value **pClampedResult, Value *rounded,
5588
                         Value *averageUnscaled, float cutoffVal,
5589
                         DXIL::TessellatorPartitioning partitionMode,
5590
72
                         hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5591
72
  Value *clampedResult = *pClampedResult;
5592
72
  Value *clampedVal = clampedResult;
5593
72
  Value *roundedVal = rounded;
5594
  // Do partitioning-specific clamping.
5595
72
  Value *clampedAvg =
5596
72
      ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder);
5597
72
  Constant *cutoffVals =
5598
72
      ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal);
5599
72
  if (clampedAvg->getType()->isVectorTy())
5600
24
    cutoffVals = ConstantVector::getSplat(
5601
24
        clampedAvg->getType()->getVectorNumElements(), cutoffVals);
5602
  // Limit the value.
5603
72
  clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg,
5604
72
                                          cutoffVals, hlslOP, Builder);
5605
  // Round up for integer/pow2 partitioning.
5606
72
  Value *roundedAvg =
5607
72
      RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder);
5608
5609
72
  if (rounded->getType() != cutoffVals->getType())
5610
48
    cutoffVals = ConstantVector::getSplat(
5611
48
        rounded->getType()->getVectorNumElements(), cutoffVals);
5612
  // If the scaled value is less than three, then take the unscaled average.
5613
72
  Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals);
5614
72
  if (clampedAvg->getType() != clampedVal->getType())
5615
48
    clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder);
5616
72
  *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal);
5617
5618
72
  if (roundedAvg->getType() != roundedVal->getType())
5619
48
    roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder);
5620
72
  Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal);
5621
72
  return result;
5622
72
}
5623
5624
void ResolveQuadAxes(Value **pFinalResult, Value **pClampedResult,
5625
                     float cutoffVal,
5626
                     DXIL::TessellatorPartitioning partitionMode,
5627
24
                     hlsl::OP *hlslOP, IRBuilder<> &Builder) {
5628
24
  Value *finalResult = *pFinalResult;
5629
24
  Value *clampedResult = *pClampedResult;
5630
5631
24
  Value *clampR = clampedResult;
5632
24
  Value *finalR = finalResult;
5633
24
  Type *f32Ty = Type::getFloatTy(finalR->getContext());
5634
24
  Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal);
5635
5636
24
  Value *minValsX = cutoffVals;
5637
24
  Value *minValsY =
5638
24
      RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder);
5639
5640
24
  Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0);
5641
24
  Value *clampRY = Builder.CreateExtractElement(clampR, 1);
5642
24
  Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX,
5643
24
                                               clampRY, hlslOP, Builder);
5644
5645
24
  Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0);
5646
24
  Value *finalRY = Builder.CreateExtractElement(finalR, 1);
5647
24
  Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX,
5648
24
                                               finalRY, hlslOP, Builder);
5649
5650
  // Don't go over our threshold ("final" one is rounded).
5651
24
  Value *optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX,
5652
24
                                              minValsX, hlslOP, Builder);
5653
24
  Value *optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY,
5654
24
                                              minValsY, hlslOP, Builder);
5655
5656
24
  Value *clampL = SplatToVector(optionX, clampR->getType(), Builder);
5657
24
  Value *finalL = SplatToVector(optionY, finalR->getType(), Builder);
5658
5659
24
  cutoffVals = ConstantVector::getSplat(2, cutoffVals);
5660
24
  Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals);
5661
24
  *pClampedResult = Builder.CreateSelect(lt, clampL, clampR);
5662
24
  *pFinalResult = Builder.CreateSelect(lt, finalL, finalR);
5663
24
}
5664
5665
Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP,
5666
                                   OP::OpCode opcode,
5667
                                   HLOperationLowerHelper &helper,
5668
                                   HLObjectOperationLowerHelper *pObjHelper,
5669
288
                                   bool &Translated) {
5670
288
  hlsl::OP *hlslOP = &helper.hlslOP;
5671
  // Get partition mode
5672
288
  DXASSERT_NOMSG(helper.functionProps);
5673
288
  DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull,
5674
288
           "must be hull shader");
5675
288
  DXIL::TessellatorPartitioning partition =
5676
288
      helper.functionProps->ShaderProps.HS.partition;
5677
5678
288
  IRBuilder<> Builder(CI);
5679
5680
288
  DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes;
5681
288
  switch (IOP) {
5682
32
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
5683
64
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
5684
96
  case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
5685
96
    tessFactorOp = DXIL::OpCode::FMax;
5686
96
    break;
5687
32
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
5688
64
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
5689
96
  case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
5690
96
    tessFactorOp = DXIL::OpCode::FMin;
5691
96
    break;
5692
96
  default:
5693
    // Default is Avg.
5694
96
    break;
5695
288
  }
5696
5697
288
  Value *rawEdgeFactor =
5698
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor);
5699
5700
288
  Value *insideScale =
5701
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale);
5702
  // Clamp to [0.0f..1.0f], NaN->0.0f.
5703
288
  Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder);
5704
  // Do partitioning-specific clamping.
5705
288
  Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder);
5706
  // Round up for integer/pow2 partitioning.
5707
288
  Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder);
5708
  // Store the output.
5709
288
  Value *roundedEdgeFactor =
5710
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor);
5711
288
  Builder.CreateStore(rounded, roundedEdgeFactor);
5712
5713
  // Clamp to [1.0f..Inf], NaN->1.0f.
5714
288
  bool isQuad = false;
5715
288
  Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder);
5716
288
  Value *factors = nullptr;
5717
288
  switch (IOP) {
5718
32
  case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
5719
64
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
5720
96
  case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
5721
96
    factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5722
96
    break;
5723
32
  case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
5724
64
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
5725
96
  case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
5726
96
    factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5727
96
    isQuad = true;
5728
96
    break;
5729
32
  case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
5730
64
  case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
5731
96
  case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
5732
96
    factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5733
96
    break;
5734
0
  default:
5735
0
    DXASSERT(0, "invalid opcode for ProcessTessFactor");
5736
0
    break;
5737
288
  }
5738
5739
288
  Value *scaledI = nullptr;
5740
288
  if (scales->getType() == factors->getType())
5741
96
    scaledI = Builder.CreateFMul(factors, scales);
5742
192
  else {
5743
192
    Value *vecFactors = SplatToVector(factors, scales->getType(), Builder);
5744
192
    scaledI = Builder.CreateFMul(vecFactors, scales);
5745
192
  }
5746
5747
  // Do partitioning-specific clamping.
5748
288
  Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder);
5749
5750
  // Round up for integer/pow2 partitioning.
5751
288
  Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder);
5752
5753
288
  Value *finalI = roundedI;
5754
5755
288
  if (partition == DXIL::TessellatorPartitioning::FractionalOdd) {
5756
    // If not max, set to AVG.
5757
72
    if (tessFactorOp != DXIL::OpCode::FMax)
5758
48
      tessFactorOp = DXIL::OpCode::NumOpCodes;
5759
5760
72
    bool b2D = false;
5761
72
    Value *avgFactorsI = nullptr;
5762
72
    switch (IOP) {
5763
8
    case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg:
5764
16
    case IntrinsicOp::IOP_Process2DQuadTessFactorsMax:
5765
24
    case IntrinsicOp::IOP_Process2DQuadTessFactorsMin:
5766
24
      avgFactorsI =
5767
24
          Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5768
24
      b2D = true;
5769
24
      break;
5770
8
    case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg:
5771
16
    case IntrinsicOp::IOP_ProcessQuadTessFactorsMax:
5772
24
    case IntrinsicOp::IOP_ProcessQuadTessFactorsMin:
5773
24
      avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5774
24
      break;
5775
8
    case IntrinsicOp::IOP_ProcessTriTessFactorsAvg:
5776
16
    case IntrinsicOp::IOP_ProcessTriTessFactorsMax:
5777
24
    case IntrinsicOp::IOP_ProcessTriTessFactorsMin:
5778
24
      avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder);
5779
24
      break;
5780
0
    default:
5781
0
      DXASSERT(0, "invalid opcode for ProcessTessFactor");
5782
0
      break;
5783
72
    }
5784
5785
72
    finalI = ResolveSmallValue(/*inout*/ &clampedI, roundedI, avgFactorsI,
5786
72
                               /*cufoff*/ 3.0, partition, hlslOP, Builder);
5787
5788
72
    if (b2D)
5789
24
      ResolveQuadAxes(/*inout*/ &finalI, /*inout*/ &clampedI, /*cutoff*/ 3.0,
5790
24
                      partition, hlslOP, Builder);
5791
72
  }
5792
5793
288
  Value *unroundedInsideFactor = CI->getArgOperand(
5794
288
      HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor);
5795
288
  Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType();
5796
288
  if (outFactorTy != clampedI->getType()) {
5797
96
    DXASSERT(isQuad, "quad only write one channel of out factor");
5798
96
    (void)isQuad;
5799
96
    clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0);
5800
    // Splat clampedI to float2.
5801
96
    clampedI = SplatToVector(clampedI, outFactorTy, Builder);
5802
96
  }
5803
288
  Builder.CreateStore(clampedI, unroundedInsideFactor);
5804
5805
288
  Value *roundedInsideFactor =
5806
288
      CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor);
5807
288
  if (outFactorTy != finalI->getType()) {
5808
96
    DXASSERT(isQuad, "quad only write one channel of out factor");
5809
96
    finalI = Builder.CreateExtractElement(finalI, (uint64_t)0);
5810
    // Splat finalI to float2.
5811
96
    finalI = SplatToVector(finalI, outFactorTy, Builder);
5812
96
  }
5813
288
  Builder.CreateStore(finalI, roundedInsideFactor);
5814
288
  return nullptr;
5815
288
}
5816
5817
} // namespace
5818
5819
// Ray Tracing.
5820
namespace {
5821
Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP,
5822
                                   OP::OpCode opcode,
5823
                                   HLOperationLowerHelper &helper,
5824
                                   HLObjectOperationLowerHelper *pObjHelper,
5825
142
                                   bool &Translated) {
5826
142
  hlsl::OP *hlslOP = &helper.hlslOP;
5827
142
  Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
5828
142
  Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
5829
142
  Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
5830
142
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5831
5832
142
  Type *Ty = Attr->getType();
5833
142
  Function *F = hlslOP->GetOpFunc(opcode, Ty);
5834
5835
142
  IRBuilder<> Builder(CI);
5836
142
  return Builder.CreateCall(F, {opArg, THit, HitKind, Attr});
5837
142
}
5838
5839
Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5840
                           HLOperationLowerHelper &helper,
5841
                           HLObjectOperationLowerHelper *pObjHelper,
5842
126
                           bool &Translated) {
5843
126
  hlsl::OP *hlslOP = &helper.hlslOP;
5844
126
  Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
5845
126
  Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
5846
126
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5847
5848
126
  Type *Ty = Parameter->getType();
5849
126
  Function *F = hlslOP->GetOpFunc(opcode, Ty);
5850
5851
126
  IRBuilder<> Builder(CI);
5852
126
  return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter});
5853
126
}
5854
5855
static void TransferRayDescArgs(Value **Args, hlsl::OP *OP,
5856
                                IRBuilder<> &Builder, CallInst *CI,
5857
766
                                unsigned &Index, unsigned &HLIndex) {
5858
  // Extract elements from flattened ray desc arguments in HL op.
5859
  // float3 Origin;
5860
766
  Value *origin = CI->getArgOperand(HLIndex++);
5861
766
  Args[Index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
5862
766
  Args[Index++] = Builder.CreateExtractElement(origin, 1);
5863
766
  Args[Index++] = Builder.CreateExtractElement(origin, 2);
5864
  // float  TMin;
5865
766
  Args[Index++] = CI->getArgOperand(HLIndex++);
5866
  // float3 Direction;
5867
766
  Value *direction = CI->getArgOperand(HLIndex++);
5868
766
  Args[Index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
5869
766
  Args[Index++] = Builder.CreateExtractElement(direction, 1);
5870
766
  Args[Index++] = Builder.CreateExtractElement(direction, 2);
5871
  // float  TMax;
5872
766
  Args[Index++] = CI->getArgOperand(HLIndex++);
5873
766
}
5874
5875
Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
5876
                         HLOperationLowerHelper &Helper,
5877
                         HLObjectOperationLowerHelper *pObjHelper,
5878
548
                         bool &Translated) {
5879
548
  hlsl::OP *OP = &Helper.hlslOP;
5880
5881
548
  Value *Args[DXIL::OperandIndex::kTraceRayNumOp];
5882
548
  Args[0] = OP->GetU32Const(static_cast<unsigned>(OpCode));
5883
548
  unsigned Index = 1, HLIndex = 1;
5884
3.83k
  while (HLIndex < HLOperandIndex::kTraceRayRayDescOpIdx)
5885
3.28k
    Args[Index++] = CI->getArgOperand(HLIndex++);
5886
5887
548
  IRBuilder<> Builder(CI);
5888
548
  TransferRayDescArgs(Args, OP, Builder, CI, Index, HLIndex);
5889
548
  DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands() - 1);
5890
548
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayPayloadOpIdx);
5891
5892
548
  Value *Payload = CI->getArgOperand(HLIndex++);
5893
548
  Args[Index++] = Payload;
5894
5895
548
  DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands());
5896
548
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayNumOp);
5897
5898
548
  Type *Ty = Payload->getType();
5899
548
  Function *F = OP->GetOpFunc(OpCode, Ty);
5900
5901
548
  return Builder.CreateCall(F, Args);
5902
548
}
5903
5904
// RayQuery methods
5905
5906
Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP,
5907
                                 OP::OpCode opcode,
5908
                                 HLOperationLowerHelper &helper,
5909
                                 HLObjectOperationLowerHelper *pObjHelper,
5910
158
                                 bool &Translated) {
5911
158
  hlsl::OP *hlslOP = &helper.hlslOP;
5912
  // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg
5913
158
  DXASSERT(CI->getNumArgOperands() == 3,
5914
158
           "hlopcode for allocaterayquery always expects 3 arguments");
5915
5916
158
  llvm::Value *Arg =
5917
158
      CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx);
5918
158
  llvm::ConstantInt *ConstVal = llvm::dyn_cast<llvm::ConstantInt>(Arg);
5919
158
  DXASSERT(ConstVal,
5920
158
           "2nd argument to allocaterayquery must always be a constant value");
5921
158
  if (ConstVal->getValue().getZExtValue() != 0) {
5922
6
    Value *refArgs[3] = {
5923
6
        nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx),
5924
6
        CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)};
5925
6
    opcode = OP::OpCode::AllocateRayQuery2;
5926
6
    return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
5927
6
  }
5928
152
  Value *refArgs[2] = {
5929
152
      nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)};
5930
152
  return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
5931
158
}
5932
5933
Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5934
                               HLOperationLowerHelper &helper,
5935
                               HLObjectOperationLowerHelper *pObjHelper,
5936
184
                               bool &Translated) {
5937
184
  hlsl::OP *hlslOP = &helper.hlslOP;
5938
184
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5939
5940
184
  Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
5941
184
  Args[0] = opArg;
5942
184
  unsigned Index = 1, HLIndex = 1;
5943
920
  while (HLIndex < HLOperandIndex::kTraceRayInlineRayDescOpIdx)
5944
736
    Args[Index++] = CI->getArgOperand(HLIndex++);
5945
5946
184
  IRBuilder<> Builder(CI);
5947
184
  DXASSERT_NOMSG(HLIndex == HLOperandIndex::kTraceRayInlineRayDescOpIdx);
5948
184
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx);
5949
184
  TransferRayDescArgs(Args, hlslOP, Builder, CI, Index, HLIndex);
5950
184
  DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands());
5951
184
  DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineNumOp);
5952
5953
184
  Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
5954
5955
184
  return Builder.CreateCall(F, Args);
5956
184
}
5957
5958
Value *TranslateCommitProceduralPrimitiveHit(
5959
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5960
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5961
8
    bool &Translated) {
5962
8
  hlsl::OP *hlslOP = &helper.hlslOP;
5963
8
  Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
5964
8
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5965
8
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5966
5967
8
  Value *Args[] = {opArg, handle, THit};
5968
5969
8
  IRBuilder<> Builder(CI);
5970
8
  Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
5971
5972
8
  return Builder.CreateCall(F, Args);
5973
8
}
5974
5975
Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP,
5976
                                      OP::OpCode opcode,
5977
                                      HLOperationLowerHelper &helper,
5978
                                      HLObjectOperationLowerHelper *pObjHelper,
5979
296
                                      bool &Translated) {
5980
296
  hlsl::OP *hlslOP = &helper.hlslOP;
5981
5982
296
  Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
5983
296
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5984
5985
296
  IRBuilder<> Builder(CI);
5986
296
  Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
5987
5988
296
  return Builder.CreateCall(F, {opArg, handle});
5989
296
}
5990
5991
Value *TranslateRayQueryMatrix3x4Operation(
5992
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
5993
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
5994
32
    bool &Translated) {
5995
32
  hlsl::OP *hlslOP = &helper.hlslOP;
5996
32
  VectorType *Ty = cast<VectorType>(CI->getType());
5997
32
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
5998
32
  uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
5999
32
  Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
6000
32
  uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
6001
32
  Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
6002
32
  Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols},
6003
32
                                       Ty, CI, hlslOP);
6004
32
  return retVal;
6005
32
}
6006
6007
Value *TranslateRayQueryTransposedMatrix3x4Operation(
6008
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6009
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6010
32
    bool &Translated) {
6011
32
  hlsl::OP *hlslOP = &helper.hlslOP;
6012
32
  VectorType *Ty = cast<VectorType>(CI->getType());
6013
32
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
6014
32
  uint32_t rVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
6015
32
  Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
6016
32
  uint8_t cVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
6017
32
  Constant *cols = ConstantDataVector::get(CI->getContext(), cVals);
6018
32
  Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols},
6019
32
                                       Ty, CI, hlslOP);
6020
32
  return retVal;
6021
32
}
6022
6023
Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP,
6024
                                     OP::OpCode opcode,
6025
                                     HLOperationLowerHelper &helper,
6026
                                     HLObjectOperationLowerHelper *pObjHelper,
6027
24
                                     bool &Translated) {
6028
24
  hlsl::OP *hlslOP = &helper.hlslOP;
6029
24
  VectorType *Ty = cast<VectorType>(CI->getType());
6030
24
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
6031
24
  uint8_t elementVals[] = {0, 1};
6032
24
  Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
6033
24
  Value *retVal =
6034
24
      TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
6035
24
  return retVal;
6036
24
}
6037
6038
Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP,
6039
                                     OP::OpCode opcode,
6040
                                     HLOperationLowerHelper &helper,
6041
                                     HLObjectOperationLowerHelper *pObjHelper,
6042
48
                                     bool &Translated) {
6043
48
  hlsl::OP *hlslOP = &helper.hlslOP;
6044
48
  VectorType *Ty = cast<VectorType>(CI->getType());
6045
48
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
6046
48
  uint8_t elementVals[] = {0, 1, 2};
6047
48
  Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
6048
48
  Value *retVal =
6049
48
      TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP);
6050
48
  return retVal;
6051
48
}
6052
6053
Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP,
6054
                                     OP::OpCode opcode,
6055
                                     HLOperationLowerHelper &helper,
6056
                                     HLObjectOperationLowerHelper *pObjHelper,
6057
450
                                     bool &Translated) {
6058
450
  hlsl::OP *hlslOP = &helper.hlslOP;
6059
450
  VectorType *Ty = cast<VectorType>(CI->getType());
6060
450
  uint8_t vals[] = {0, 1, 2, 3};
6061
450
  Constant *src = ConstantDataVector::get(CI->getContext(), vals);
6062
450
  Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP);
6063
450
  return retVal;
6064
450
}
6065
6066
static Value *ConstructBuiltInTrianglePositionsFromFloat9(
6067
22
    Value *float9Vec, StructType *hlslStructTy, IRBuilder<> &Builder) {
6068
22
  Type *f32Ty = Type::getFloatTy(Builder.getContext());
6069
22
  Type *float3Ty = VectorType::get(f32Ty, 3);
6070
22
  Value *result = UndefValue::get(hlslStructTy);
6071
6072
  // Build p0, p1, p2 from vector elements 0-2, 3-5, 6-8
6073
88
  for (unsigned field = 0; field < 3; 
field++66
) {
6074
66
    Value *float3 = UndefValue::get(float3Ty);
6075
264
    for (unsigned i = 0; i < 3; 
i++198
) {
6076
198
      Value *elem = Builder.CreateExtractElement(float9Vec, field * 3 + i);
6077
198
      float3 = Builder.CreateInsertElement(float3, elem, i);
6078
198
    }
6079
66
    result = Builder.CreateInsertValue(result, float3, field);
6080
66
  }
6081
6082
22
  return result;
6083
22
}
6084
6085
Value *TranslateTriangleObjectPositions(
6086
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6087
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6088
10
    bool &Translated) {
6089
10
  hlsl::OP *hlslOP = &helper.hlslOP;
6090
10
  IRBuilder<> Builder(CI);
6091
6092
10
  Value *outputPtr = CI->getArgOperand(HLOperandIndex::kIOP_SRetOpIdx);
6093
10
  StructType *hlslStructTy =
6094
10
      cast<StructType>(outputPtr->getType()->getPointerElementType());
6095
6096
10
  Type *f32Ty = Type::getFloatTy(CI->getContext());
6097
10
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, f32Ty);
6098
10
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6099
6100
10
  Value *dxilCall = Builder.CreateCall(dxilFunc, {opArg});
6101
6102
10
  Value *structValue = ConstructBuiltInTrianglePositionsFromFloat9(
6103
10
      dxilCall, hlslStructTy, Builder);
6104
10
  Builder.CreateStore(structValue, outputPtr);
6105
6106
10
  return nullptr;
6107
10
}
6108
6109
Value *TranslateRayQueryTriangleObjectPositions(
6110
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6111
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6112
8
    bool &Translated) {
6113
8
  hlsl::OP *hlslOP = &helper.hlslOP;
6114
6115
8
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
6116
8
  StructType *hlslStructTy =
6117
8
      cast<StructType>(CI->getType()->getPointerElementType());
6118
6119
8
  Function *F = CI->getParent()->getParent();
6120
8
  IRBuilder<> AllocaBuilder(&F->getEntryBlock(), F->getEntryBlock().begin());
6121
8
  AllocaInst *resultAlloca = AllocaBuilder.CreateAlloca(hlslStructTy);
6122
6123
8
  IRBuilder<> Builder(CI);
6124
6125
8
  Type *f32Ty = Type::getFloatTy(CI->getContext());
6126
8
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, f32Ty);
6127
8
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6128
6129
8
  Value *dxilCall = Builder.CreateCall(dxilFunc, {opArg, handle});
6130
6131
8
  Value *structValue = ConstructBuiltInTrianglePositionsFromFloat9(
6132
8
      dxilCall, hlslStructTy, Builder);
6133
8
  Builder.CreateStore(structValue, resultAlloca);
6134
6135
8
  return resultAlloca;
6136
8
}
6137
6138
Value *TranslateHitObjectTriangleObjectPositions(
6139
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6140
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6141
4
    bool &Translated) {
6142
4
  hlsl::OP *hlslOP = &helper.hlslOP;
6143
6144
4
  Value *hitObjectPtr = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
6145
4
  StructType *hlslStructTy =
6146
4
      cast<StructType>(CI->getType()->getPointerElementType());
6147
6148
4
  Function *F = CI->getParent()->getParent();
6149
4
  IRBuilder<> AllocaBuilder(&F->getEntryBlock(), F->getEntryBlock().begin());
6150
4
  AllocaInst *resultAlloca = AllocaBuilder.CreateAlloca(hlslStructTy);
6151
6152
4
  IRBuilder<> Builder(CI);
6153
4
  Value *hitObject = Builder.CreateLoad(hitObjectPtr);
6154
6155
4
  Type *f32Ty = Type::getFloatTy(CI->getContext());
6156
4
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, f32Ty);
6157
4
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6158
6159
4
  Value *dxilCall = Builder.CreateCall(dxilFunc, {opArg, hitObject});
6160
6161
4
  Value *structValue = ConstructBuiltInTrianglePositionsFromFloat9(
6162
4
      dxilCall, hlslStructTy, Builder);
6163
4
  Builder.CreateStore(structValue, resultAlloca);
6164
6165
4
  return resultAlloca;
6166
4
}
6167
6168
template <typename ColElemTy>
6169
static void GetMatrixIndices(Constant *&Rows, Constant *&Cols, bool Is3x4,
6170
72
                             LLVMContext &Ctx) {
6171
72
  if (Is3x4) {
6172
48
    uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
6173
48
    Rows = ConstantDataVector::get(Ctx, RVals);
6174
48
    ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
6175
48
    Cols = ConstantDataVector::get(Ctx, CVals);
6176
48
    return;
6177
48
  }
6178
24
  uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
6179
24
  Rows = ConstantDataVector::get(Ctx, RVals);
6180
24
  ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
6181
24
  Cols = ConstantDataVector::get(Ctx, CVals);
6182
24
}
HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned char>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&)
Line
Count
Source
6170
56
                             LLVMContext &Ctx) {
6171
56
  if (Is3x4) {
6172
40
    uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
6173
40
    Rows = ConstantDataVector::get(Ctx, RVals);
6174
40
    ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
6175
40
    Cols = ConstantDataVector::get(Ctx, CVals);
6176
40
    return;
6177
40
  }
6178
16
  uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
6179
16
  Rows = ConstantDataVector::get(Ctx, RVals);
6180
16
  ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
6181
16
  Cols = ConstantDataVector::get(Ctx, CVals);
6182
16
}
HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned int>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&)
Line
Count
Source
6170
16
                             LLVMContext &Ctx) {
6171
16
  if (Is3x4) {
6172
8
    uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
6173
8
    Rows = ConstantDataVector::get(Ctx, RVals);
6174
8
    ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
6175
8
    Cols = ConstantDataVector::get(Ctx, CVals);
6176
8
    return;
6177
8
  }
6178
8
  uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2};
6179
8
  Rows = ConstantDataVector::get(Ctx, RVals);
6180
8
  ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
6181
8
  Cols = ConstantDataVector::get(Ctx, CVals);
6182
8
}
6183
6184
Value *TranslateNoArgMatrix3x4Operation(
6185
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6186
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6187
40
    bool &Translated) {
6188
40
  hlsl::OP *hlslOP = &helper.hlslOP;
6189
40
  VectorType *Ty = cast<VectorType>(CI->getType());
6190
40
  Constant *Rows, *Cols;
6191
40
  GetMatrixIndices<uint8_t>(Rows, Cols, true, CI->getContext());
6192
40
  return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP);
6193
40
}
6194
6195
Value *TranslateNoArgTransposedMatrix3x4Operation(
6196
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6197
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6198
16
    bool &Translated) {
6199
16
  hlsl::OP *hlslOP = &helper.hlslOP;
6200
16
  VectorType *Ty = cast<VectorType>(CI->getType());
6201
16
  Constant *Rows, *Cols;
6202
16
  GetMatrixIndices<uint8_t>(Rows, Cols, false, CI->getContext());
6203
16
  return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP);
6204
16
}
6205
6206
/*
6207
HLSL:
6208
void ThreadNodeOutputRecords<recordType>::OutputComplete();
6209
void GroupNodeOutputRecords<recordType>::OutputComplete();
6210
DXIL:
6211
void @dx.op.outputComplete(i32 %Opcode, %dx.types.NodeRecordHandle
6212
%RecordHandle)
6213
*/
6214
Value *TranslateNodeOutputComplete(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
6215
                                   HLOperationLowerHelper &helper,
6216
                                   HLObjectOperationLowerHelper *pObjHelper,
6217
146
                                   bool &Translated) {
6218
146
  hlsl::OP *OP = &helper.hlslOP;
6219
6220
146
  Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
6221
146
  DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType());
6222
146
  Function *dxilFunc = OP->GetOpFunc(op, CI->getType());
6223
146
  Value *opArg = OP->GetU32Const((unsigned)op);
6224
6225
146
  IRBuilder<> Builder(CI);
6226
146
  return Builder.CreateCall(dxilFunc, {opArg, handle});
6227
146
}
6228
6229
Value *TranslateNoArgNoReturnPreserveOutput(
6230
    CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6231
    HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper,
6232
144
    bool &Translated) {
6233
144
  Instruction *pResult = cast<Instruction>(
6234
144
      TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated));
6235
  // HL intrinsic must have had a return injected just after the call.
6236
  // SROA_Parameter_HLSL will copy from alloca to output just before each
6237
  // return. Now move call after the copy and just before the return.
6238
144
  if (isa<ReturnInst>(pResult->getNextNode()))
6239
0
    return pResult;
6240
144
  ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator());
6241
144
  pResult->removeFromParent();
6242
144
  pResult->insertBefore(RetI);
6243
144
  return pResult;
6244
144
}
6245
6246
// Special half dot2 with accumulate to float
6247
Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6248
                        HLOperationLowerHelper &helper,
6249
                        HLObjectOperationLowerHelper *pObjHelper,
6250
16
                        bool &Translated) {
6251
16
  hlsl::OP *hlslOP = &helper.hlslOP;
6252
16
  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
6253
16
  const unsigned vecSize = 2;
6254
16
  DXASSERT(src0->getType()->isVectorTy() &&
6255
16
               vecSize == src0->getType()->getVectorNumElements() &&
6256
16
               src0->getType()->getScalarType()->isHalfTy(),
6257
16
           "otherwise, unexpected input dimension or component type");
6258
6259
16
  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
6260
16
  DXASSERT(src0->getType() == src1->getType(),
6261
16
           "otherwise, mismatched argument types");
6262
16
  Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
6263
16
  Type *accTy = accArg->getType();
6264
16
  DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(),
6265
16
           "otherwise, unexpected accumulator type");
6266
16
  IRBuilder<> Builder(CI);
6267
6268
16
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
6269
16
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6270
6271
16
  SmallVector<Value *, 6> args;
6272
16
  args.emplace_back(opArg);
6273
16
  args.emplace_back(accArg);
6274
48
  for (unsigned i = 0; i < vecSize; 
i++32
)
6275
32
    args.emplace_back(Builder.CreateExtractElement(src0, i));
6276
48
  for (unsigned i = 0; i < vecSize; 
i++32
)
6277
32
    args.emplace_back(Builder.CreateExtractElement(src1, i));
6278
16
  return Builder.CreateCall(dxilFunc, args);
6279
16
}
6280
6281
Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6282
                              HLOperationLowerHelper &helper,
6283
                              HLObjectOperationLowerHelper *pObjHelper,
6284
32
                              bool &Translated) {
6285
32
  hlsl::OP *hlslOP = &helper.hlslOP;
6286
32
  Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
6287
32
  DXASSERT(
6288
32
      !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32),
6289
32
      "otherwise, unexpected vector support in high level intrinsic template");
6290
32
  Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
6291
32
  DXASSERT(src0->getType() == src1->getType(),
6292
32
           "otherwise, mismatched argument types");
6293
32
  Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
6294
32
  Type *accTy = accArg->getType();
6295
32
  DXASSERT(
6296
32
      !accTy->isVectorTy() && accTy->isIntegerTy(32),
6297
32
      "otherwise, unexpected vector support in high level intrinsic template");
6298
32
  IRBuilder<> Builder(CI);
6299
6300
32
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy);
6301
32
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6302
32
  return Builder.CreateCall(dxilFunc, {opArg, accArg, src0, src1});
6303
32
}
6304
6305
Value *TranslatePack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6306
                     HLOperationLowerHelper &helper,
6307
                     HLObjectOperationLowerHelper *pObjHelper,
6308
72
                     bool &Translated) {
6309
72
  hlsl::OP *hlslOP = &helper.hlslOP;
6310
6311
72
  Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
6312
72
  Type *valTy = val->getType();
6313
72
  Type *eltTy = valTy->getScalarType();
6314
6315
72
  DXASSERT(valTy->isVectorTy() && valTy->getVectorNumElements() == 4 &&
6316
72
               eltTy->isIntegerTy() &&
6317
72
               (eltTy->getIntegerBitWidth() == 32 ||
6318
72
                eltTy->getIntegerBitWidth() == 16),
6319
72
           "otherwise, unexpected input dimension or component type");
6320
6321
72
  DXIL::PackMode packMode = DXIL::PackMode::Trunc;
6322
72
  switch (IOP) {
6323
18
  case hlsl::IntrinsicOp::IOP_pack_clamp_s8:
6324
18
    packMode = DXIL::PackMode::SClamp;
6325
18
    break;
6326
18
  case hlsl::IntrinsicOp::IOP_pack_clamp_u8:
6327
18
    packMode = DXIL::PackMode::UClamp;
6328
18
    break;
6329
18
  case hlsl::IntrinsicOp::IOP_pack_s8:
6330
36
  case hlsl::IntrinsicOp::IOP_pack_u8:
6331
36
    packMode = DXIL::PackMode::Trunc;
6332
36
    break;
6333
0
  default:
6334
0
    DXASSERT(false, "unexpected opcode");
6335
0
    break;
6336
72
  }
6337
6338
72
  IRBuilder<> Builder(CI);
6339
72
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, eltTy);
6340
72
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6341
72
  Constant *packModeArg = hlslOP->GetU8Const((unsigned)packMode);
6342
6343
72
  Value *elt0 = Builder.CreateExtractElement(val, (uint64_t)0);
6344
72
  Value *elt1 = Builder.CreateExtractElement(val, (uint64_t)1);
6345
72
  Value *elt2 = Builder.CreateExtractElement(val, (uint64_t)2);
6346
72
  Value *elt3 = Builder.CreateExtractElement(val, (uint64_t)3);
6347
72
  return Builder.CreateCall(dxilFunc,
6348
72
                            {opArg, packModeArg, elt0, elt1, elt2, elt3});
6349
72
}
6350
6351
Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6352
                       HLOperationLowerHelper &helper,
6353
                       HLObjectOperationLowerHelper *pObjHelper,
6354
88
                       bool &Translated) {
6355
88
  hlsl::OP *hlslOP = &helper.hlslOP;
6356
6357
88
  Value *packedVal = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
6358
88
  DXASSERT(
6359
88
      !packedVal->getType()->isVectorTy() &&
6360
88
          packedVal->getType()->isIntegerTy(32),
6361
88
      "otherwise, unexpected vector support in high level intrinsic template");
6362
6363
88
  Type *overloadType = nullptr;
6364
88
  DXIL::UnpackMode unpackMode = DXIL::UnpackMode::Unsigned;
6365
88
  switch (IOP) {
6366
24
  case hlsl::IntrinsicOp::IOP_unpack_s8s32:
6367
24
    unpackMode = DXIL::UnpackMode::Signed;
6368
24
    overloadType = helper.i32Ty;
6369
24
    break;
6370
24
  case hlsl::IntrinsicOp::IOP_unpack_u8u32:
6371
24
    unpackMode = DXIL::UnpackMode::Unsigned;
6372
24
    overloadType = helper.i32Ty;
6373
24
    break;
6374
20
  case hlsl::IntrinsicOp::IOP_unpack_s8s16:
6375
20
    unpackMode = DXIL::UnpackMode::Signed;
6376
20
    overloadType = helper.i16Ty;
6377
20
    break;
6378
20
  case hlsl::IntrinsicOp::IOP_unpack_u8u16:
6379
20
    unpackMode = DXIL::UnpackMode::Unsigned;
6380
20
    overloadType = helper.i16Ty;
6381
20
    break;
6382
0
  default:
6383
0
    DXASSERT(false, "unexpected opcode");
6384
0
    break;
6385
88
  }
6386
6387
88
  IRBuilder<> Builder(CI);
6388
88
  Function *dxilFunc = hlslOP->GetOpFunc(opcode, overloadType);
6389
88
  Constant *opArg = hlslOP->GetU32Const((unsigned)opcode);
6390
88
  Constant *unpackModeArg = hlslOP->GetU8Const((unsigned)unpackMode);
6391
88
  Value *Res = Builder.CreateCall(dxilFunc, {opArg, unpackModeArg, packedVal});
6392
6393
  // Convert the final aggregate into a vector to make the types match
6394
88
  const unsigned vecSize = 4;
6395
88
  Value *ResVec = UndefValue::get(CI->getType());
6396
440
  for (unsigned i = 0; i < vecSize; 
++i352
) {
6397
352
    Value *Elt = Builder.CreateExtractValue(Res, i);
6398
352
    ResVec = Builder.CreateInsertElement(ResVec, Elt, i);
6399
352
  }
6400
88
  return ResVec;
6401
88
}
6402
6403
} // namespace
6404
6405
// Shader Execution Reordering.
6406
namespace {
6407
Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP,
6408
                                 OP::OpCode Opcode,
6409
                                 HLOperationLowerHelper &Helper,
6410
                                 HLObjectOperationLowerHelper *ObjHelper,
6411
44
                                 bool &Translated) {
6412
44
  hlsl::OP *HlslOP = &Helper.hlslOP;
6413
44
  IRBuilder<> Builder(CI);
6414
44
  Value *HitObjectPtr = CI->getArgOperand(1);
6415
44
  Value *HitObject = TrivialDxilOperation(
6416
44
      Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP);
6417
44
  Builder.CreateStore(HitObject, HitObjectPtr);
6418
44
  DXASSERT(
6419
44
      CI->use_empty(),
6420
44
      "Default ctor return type is a Clang artifact. Value must not be used");
6421
44
  return nullptr;
6422
44
}
6423
6424
Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP,
6425
                                  OP::OpCode Opcode,
6426
                                  HLOperationLowerHelper &Helper,
6427
                                  HLObjectOperationLowerHelper *ObjHelper,
6428
24
                                  bool &Translated) {
6429
24
  DXASSERT_NOMSG(CI->getNumArgOperands() ==
6430
24
                 HLOperandIndex::kHitObjectMakeMiss_NumOp);
6431
24
  hlsl::OP *OP = &Helper.hlslOP;
6432
24
  IRBuilder<> Builder(CI);
6433
24
  Value *Args[DXIL::OperandIndex::kHitObjectMakeMiss_NumOp];
6434
24
  Args[0] = nullptr; // Filled in by TrivialDxilOperation
6435
6436
24
  unsigned DestIdx = 1, SrcIdx = 1;
6437
24
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6438
24
  Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // RayFlags
6439
24
  Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // MissShaderIdx
6440
6441
24
  DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx);
6442
24
  DXASSERT_NOMSG(DestIdx ==
6443
24
                 DXIL::OperandIndex::kHitObjectMakeMiss_RayDescOpIdx);
6444
24
  TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx);
6445
24
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6446
24
  DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectMakeMiss_NumOp);
6447
6448
24
  Value *OutHitObject =
6449
24
      TrivialDxilOperation(Opcode, Args, Helper.voidTy, CI, OP);
6450
24
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6451
24
  return nullptr;
6452
24
}
6453
6454
Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP,
6455
                                   OP::OpCode OpCode,
6456
                                   HLOperationLowerHelper &Helper,
6457
                                   HLObjectOperationLowerHelper *pObjHelper,
6458
36
                                   bool &Translated) {
6459
36
  hlsl::OP *OP = &Helper.hlslOP;
6460
6461
  // clang-format off
6462
  // Match MaybeReorderThread overload variants:
6463
  // void MaybeReorderThread(<Op>,
6464
  //                    HitObject Hit);
6465
  // void MaybeReorderThread(<Op>,
6466
  //                    uint CoherenceHint,
6467
  //                    uint NumCoherenceHintBitsFromLSB );
6468
  // void MaybeReorderThread(<Op>,
6469
  //                    HitObject Hit,
6470
  //                    uint CoherenceHint,
6471
  //                    uint NumCoherenceHintBitsFromLSB);
6472
  // clang-format on
6473
36
  const unsigned NumHLArgs = CI->getNumArgOperands();
6474
36
  DXASSERT_NOMSG(NumHLArgs >= 2);
6475
6476
  // Use a NOP HitObject for MaybeReorderThread without HitObject.
6477
36
  Value *HitObject = nullptr;
6478
36
  unsigned HLIndex = 1;
6479
36
  if (3 == NumHLArgs) {
6480
6
    HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr},
6481
6
                                     Type::getVoidTy(CI->getContext()), CI, OP);
6482
30
  } else {
6483
30
    Value *FirstParam = CI->getArgOperand(HLIndex);
6484
30
    DXASSERT_NOMSG(isa<PointerType>(FirstParam->getType()));
6485
30
    IRBuilder<> Builder(CI);
6486
30
    HitObject = Builder.CreateLoad(FirstParam);
6487
30
    HLIndex++;
6488
30
  }
6489
6490
  // If there are trailing parameters, these have to be the two coherence bit
6491
  // parameters
6492
36
  Value *CoherenceHint = nullptr;
6493
36
  Value *NumCoherenceHintBits = nullptr;
6494
36
  if (2 != NumHLArgs) {
6495
12
    DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs);
6496
12
    CoherenceHint = CI->getArgOperand(HLIndex++);
6497
12
    NumCoherenceHintBits = CI->getArgOperand(HLIndex++);
6498
12
    DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType());
6499
12
    DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType());
6500
24
  } else {
6501
24
    CoherenceHint = UndefValue::get(Helper.i32Ty);
6502
24
    NumCoherenceHintBits = OP->GetU32Const(0);
6503
24
  }
6504
6505
36
  TrivialDxilOperation(
6506
36
      OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits},
6507
36
      Type::getVoidTy(CI->getContext()), CI, OP);
6508
36
  return nullptr;
6509
36
}
6510
6511
Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP,
6512
                                      OP::OpCode OpCode,
6513
                                      HLOperationLowerHelper &Helper,
6514
                                      HLObjectOperationLowerHelper *pObjHelper,
6515
8
                                      bool &Translated) {
6516
8
  hlsl::OP *OP = &Helper.hlslOP;
6517
8
  IRBuilder<> Builder(CI);
6518
6519
8
  unsigned SrcIdx = 1;
6520
8
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6521
8
  Value *RayQuery = CI->getArgOperand(SrcIdx++);
6522
6523
8
  if (CI->getNumArgOperands() ==
6524
8
      HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) {
6525
4
    Value *HitKind = CI->getArgOperand(SrcIdx++);
6526
4
    Value *AttribSrc = CI->getArgOperand(SrcIdx++);
6527
4
    DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6528
4
    OpCode = DXIL::OpCode::HitObject_FromRayQueryWithAttrs;
6529
4
    Type *AttrTy = AttribSrc->getType();
6530
4
    Value *OutHitObject = TrivialDxilOperation(
6531
4
        OpCode, {nullptr, RayQuery, HitKind, AttribSrc}, AttrTy, CI, OP);
6532
4
    Builder.CreateStore(OutHitObject, HitObjectPtr);
6533
4
    return nullptr;
6534
4
  }
6535
6536
4
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6537
4
  OpCode = DXIL::OpCode::HitObject_FromRayQuery;
6538
4
  Value *OutHitObject =
6539
4
      TrivialDxilOperation(OpCode, {nullptr, RayQuery}, Helper.voidTy, CI, OP);
6540
4
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6541
4
  return nullptr;
6542
8
}
6543
6544
Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP,
6545
                                  OP::OpCode OpCode,
6546
                                  HLOperationLowerHelper &Helper,
6547
                                  HLObjectOperationLowerHelper *pObjHelper,
6548
10
                                  bool &Translated) {
6549
10
  hlsl::OP *OP = &Helper.hlslOP;
6550
10
  IRBuilder<> Builder(CI);
6551
6552
10
  DXASSERT_NOMSG(CI->getNumArgOperands() ==
6553
10
                 HLOperandIndex::kHitObjectTraceRay_NumOp);
6554
10
  Value *Args[DXIL::OperandIndex::kHitObjectTraceRay_NumOp];
6555
10
  Value *OpArg = OP->GetU32Const(static_cast<unsigned>(OpCode));
6556
10
  Args[0] = OpArg;
6557
6558
10
  unsigned DestIdx = 1, SrcIdx = 1;
6559
10
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6560
10
  Args[DestIdx++] = CI->getArgOperand(SrcIdx++);
6561
60
  for (; SrcIdx < HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx;
6562
50
       ++SrcIdx, ++DestIdx) {
6563
50
    Args[DestIdx] = CI->getArgOperand(SrcIdx);
6564
50
  }
6565
6566
10
  DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx);
6567
10
  DXASSERT_NOMSG(DestIdx ==
6568
10
                 DXIL::OperandIndex::kHitObjectTraceRay_RayDescOpIdx);
6569
10
  TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx);
6570
10
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands() - 1);
6571
10
  DXASSERT_NOMSG(DestIdx ==
6572
10
                 DXIL::OperandIndex::kHitObjectTraceRay_PayloadOpIdx);
6573
6574
10
  Value *Payload = CI->getArgOperand(SrcIdx++);
6575
10
  Args[DestIdx++] = Payload;
6576
6577
10
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6578
10
  DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectTraceRay_NumOp);
6579
6580
10
  Function *F = OP->GetOpFunc(OpCode, Payload->getType());
6581
6582
10
  Value *OutHitObject = Builder.CreateCall(F, Args);
6583
10
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6584
10
  return nullptr;
6585
10
}
6586
6587
Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP,
6588
                                OP::OpCode OpCode,
6589
                                HLOperationLowerHelper &Helper,
6590
                                HLObjectOperationLowerHelper *pObjHelper,
6591
4
                                bool &Translated) {
6592
4
  unsigned SrcIdx = 1;
6593
4
  Value *HitObjectPtr = CI->getArgOperand(SrcIdx++);
6594
4
  Value *Payload = CI->getArgOperand(SrcIdx++);
6595
4
  DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands());
6596
6597
4
  IRBuilder<> Builder(CI);
6598
4
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6599
4
  TrivialDxilOperation(OpCode, {nullptr, HitObject, Payload},
6600
4
                       Payload->getType(), CI, &Helper.hlslOP);
6601
4
  return nullptr;
6602
4
}
6603
6604
Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP,
6605
                                       OP::OpCode OpCode,
6606
                                       HLOperationLowerHelper &Helper,
6607
                                       HLObjectOperationLowerHelper *pObjHelper,
6608
6
                                       bool &Translated) {
6609
6
  hlsl::OP *OP = &Helper.hlslOP;
6610
6
  IRBuilder<> Builder(CI);
6611
6612
6
  Value *HitObjectPtr = CI->getArgOperand(1);
6613
6
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6614
6
  Value *AttrOutPtr =
6615
6
      CI->getArgOperand(HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx);
6616
6
  TrivialDxilOperation(OpCode, {nullptr, HitObject, AttrOutPtr},
6617
6
                       AttrOutPtr->getType(), CI, OP);
6618
6
  return nullptr;
6619
6
}
6620
6621
Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP,
6622
                                      OP::OpCode OpCode,
6623
                                      HLOperationLowerHelper &Helper,
6624
                                      HLObjectOperationLowerHelper *pObjHelper,
6625
74
                                      bool &Translated) {
6626
74
  hlsl::OP *OP = &Helper.hlslOP;
6627
74
  Value *HitObjectPtr = CI->getArgOperand(1);
6628
74
  IRBuilder<> Builder(CI);
6629
74
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6630
74
  return TrivialDxilOperation(OpCode, {nullptr, HitObject}, CI->getType(), CI,
6631
74
                              OP);
6632
74
}
6633
6634
Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP,
6635
                                      OP::OpCode OpCode,
6636
                                      HLOperationLowerHelper &Helper,
6637
                                      HLObjectOperationLowerHelper *pObjHelper,
6638
16
                                      bool &Translated) {
6639
16
  hlsl::OP *OP = &Helper.hlslOP;
6640
16
  Value *HitObjectPtr = CI->getArgOperand(1);
6641
16
  IRBuilder<> Builder(CI);
6642
16
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6643
16
  VectorType *Ty = cast<VectorType>(CI->getType());
6644
16
  uint32_t Vals[] = {0, 1, 2, 3};
6645
16
  Constant *Src = ConstantDataVector::get(CI->getContext(), Vals);
6646
16
  return TrivialDxilOperation(OpCode, {nullptr, HitObject, Src}, Ty, CI, OP);
6647
16
}
6648
6649
16
static bool IsHitObject3x4Getter(IntrinsicOp IOP) {
6650
16
  switch (IOP) {
6651
8
  default:
6652
8
    return false;
6653
4
  case IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4:
6654
8
  case IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4:
6655
8
    return true;
6656
16
  }
6657
16
}
6658
6659
Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP,
6660
                                      OP::OpCode OpCode,
6661
                                      HLOperationLowerHelper &Helper,
6662
                                      HLObjectOperationLowerHelper *pObjHelper,
6663
16
                                      bool &Translated) {
6664
16
  hlsl::OP *OP = &Helper.hlslOP;
6665
16
  Value *HitObjectPtr = CI->getArgOperand(1);
6666
16
  IRBuilder<> Builder(CI);
6667
16
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6668
6669
  // Create 3x4 matrix indices
6670
16
  bool Is3x4 = IsHitObject3x4Getter(IOP);
6671
16
  Constant *Rows, *Cols;
6672
16
  GetMatrixIndices<uint32_t>(Rows, Cols, Is3x4, CI->getContext());
6673
6674
16
  VectorType *Ty = cast<VectorType>(CI->getType());
6675
16
  return TrivialDxilOperation(OpCode, {nullptr, HitObject, Rows, Cols}, Ty, CI,
6676
16
                              OP);
6677
16
}
6678
6679
Value *TranslateHitObjectLoadLocalRootTableConstant(
6680
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6681
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
6682
4
    bool &Translated) {
6683
4
  hlsl::OP *OP = &Helper.hlslOP;
6684
4
  IRBuilder<> Builder(CI);
6685
6686
4
  Value *HitObjectPtr = CI->getArgOperand(1);
6687
4
  Value *Offset = CI->getArgOperand(2);
6688
6689
4
  Value *HitObject = Builder.CreateLoad(HitObjectPtr);
6690
4
  return TrivialDxilOperation(OpCode, {nullptr, HitObject, Offset},
6691
4
                              Helper.voidTy, CI, OP);
6692
4
}
6693
6694
Value *TranslateHitObjectSetShaderTableIndex(
6695
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6696
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper,
6697
4
    bool &Translated) {
6698
4
  hlsl::OP *OP = &Helper.hlslOP;
6699
4
  IRBuilder<> Builder(CI);
6700
6701
4
  Value *HitObjectPtr = CI->getArgOperand(1);
6702
4
  Value *ShaderTableIndex = CI->getArgOperand(2);
6703
6704
4
  Value *InHitObject = Builder.CreateLoad(HitObjectPtr);
6705
4
  Value *OutHitObject = TrivialDxilOperation(
6706
4
      OpCode, {nullptr, InHitObject, ShaderTableIndex}, Helper.voidTy, CI, OP);
6707
4
  Builder.CreateStore(OutHitObject, HitObjectPtr);
6708
4
  return nullptr;
6709
4
}
6710
6711
} // namespace
6712
6713
// Resource Handle.
6714
namespace {
6715
Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP,
6716
                                  DXIL::OpCode opcode,
6717
                                  HLOperationLowerHelper &helper,
6718
                                  HLObjectOperationLowerHelper *pObjHelper,
6719
602
                                  bool &Translated) {
6720
602
  hlsl::OP &hlslOP = helper.hlslOP;
6721
602
  Function *dxilFunc = hlslOP.GetOpFunc(opcode, helper.voidTy);
6722
602
  IRBuilder<> Builder(CI);
6723
602
  Value *opArg = ConstantInt::get(helper.i32Ty, (unsigned)opcode);
6724
602
  return Builder.CreateCall(
6725
602
      dxilFunc, {opArg, CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx),
6726
602
                 CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx),
6727
                 // TODO: update nonUniformIndex later.
6728
602
                 Builder.getInt1(false)});
6729
602
}
6730
} // namespace
6731
6732
// Translate and/or/select intrinsics
6733
namespace {
6734
Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6735
                    HLOperationLowerHelper &helper,
6736
                    HLObjectOperationLowerHelper *pObjHelper,
6737
60
                    bool &Translated) {
6738
60
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
6739
60
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
6740
60
  IRBuilder<> Builder(CI);
6741
6742
60
  return Builder.CreateAnd(x, y);
6743
60
}
6744
Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6745
                   HLOperationLowerHelper &helper,
6746
60
                   HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
6747
60
  Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
6748
60
  Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
6749
60
  IRBuilder<> Builder(CI);
6750
6751
60
  return Builder.CreateOr(x, y);
6752
60
}
6753
Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
6754
                       HLOperationLowerHelper &helper,
6755
                       HLObjectOperationLowerHelper *pObjHelper,
6756
30
                       bool &Translated) {
6757
30
  Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx);
6758
30
  Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx);
6759
30
  Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx);
6760
30
  IRBuilder<> Builder(CI);
6761
6762
30
  return Builder.CreateSelect(cond, t, f);
6763
30
}
6764
6765
Value *TranslateLinAlgFillMatrix(CallInst *CI, IntrinsicOp IOP,
6766
                                 OP::OpCode OpCode,
6767
                                 HLOperationLowerHelper &Helper,
6768
                                 HLObjectOperationLowerHelper *ObjHelper,
6769
44
                                 bool &Translated) {
6770
44
  hlsl::OP *HlslOp = &Helper.hlslOP;
6771
44
  IRBuilder<> Builder(CI);
6772
6773
44
  Value *MatrixPtr = CI->getArgOperand(1);
6774
44
  DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType()));
6775
44
  Type *MatrixType = MatrixPtr->getType()->getPointerElementType();
6776
44
  Value *Scalar = CI->getArgOperand(2);
6777
6778
44
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6779
44
  Function *DxilFunc =
6780
44
      HlslOp->GetOpFunc(OpCode, {MatrixType, Scalar->getType()});
6781
6782
44
  Value *Matrix = Builder.CreateCall(DxilFunc, {OpArg, Scalar});
6783
44
  Builder.CreateStore(Matrix, MatrixPtr);
6784
6785
44
  return nullptr;
6786
44
}
6787
6788
Value *TranslateLinAlgMatrixAccumStoreToDescriptor(
6789
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6790
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
6791
10
    bool &Translated) {
6792
10
  hlsl::OP *HlslOp = &Helper.hlslOP;
6793
10
  IRBuilder<> Builder(CI);
6794
6795
10
  Value *Matrix = CI->getArgOperand(1);
6796
10
  Value *ResHandle = CI->getArgOperand(2);
6797
10
  Value *Offset = CI->getArgOperand(3);
6798
10
  Value *Stride = CI->getArgOperand(4);
6799
10
  Value *Layout = CI->getArgOperand(5);
6800
10
  Value *Align = CI->getArgOperand(6);
6801
6802
10
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6803
10
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Matrix->getType());
6804
6805
10
  return Builder.CreateCall(
6806
10
      DxilFunc, {OpArg, Matrix, ResHandle, Offset, Stride, Layout, Align});
6807
10
}
6808
6809
Value *TranslateLinAlgMatVecMul(CallInst *CI, IntrinsicOp IOP,
6810
                                OP::OpCode OpCode,
6811
                                HLOperationLowerHelper &Helper,
6812
                                HLObjectOperationLowerHelper *ObjHelper,
6813
4
                                bool &Translated) {
6814
4
  hlsl::OP *HlslOp = &Helper.hlslOP;
6815
4
  IRBuilder<> Builder(CI);
6816
6817
4
  Value *ReturnVecPtr = CI->getArgOperand(1);
6818
4
  DXASSERT_NOMSG(isa<PointerType>(ReturnVecPtr->getType()));
6819
4
  Type *ReturnVecType = ReturnVecPtr->getType()->getPointerElementType();
6820
6821
4
  Value *Matrix = CI->getArgOperand(2);
6822
4
  Value *IsOutputSigned = CI->getArgOperand(3);
6823
4
  Value *InputVector = CI->getArgOperand(4);
6824
4
  Value *InputVectorInterp = CI->getArgOperand(5);
6825
6826
4
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6827
4
  Function *DxilFunc = HlslOp->GetOpFunc(
6828
4
      OpCode, {ReturnVecType, Matrix->getType(), InputVector->getType()});
6829
6830
4
  Value *ReturnVec =
6831
4
      Builder.CreateCall(DxilFunc, {OpArg, Matrix, IsOutputSigned, InputVector,
6832
4
                                    InputVectorInterp});
6833
4
  Builder.CreateStore(ReturnVec, ReturnVecPtr);
6834
6835
4
  return nullptr;
6836
4
}
6837
6838
Value *TranslateLinAlgMatVecMulAdd(CallInst *CI, IntrinsicOp IOP,
6839
                                   OP::OpCode OpCode,
6840
                                   HLOperationLowerHelper &Helper,
6841
                                   HLObjectOperationLowerHelper *ObjHelper,
6842
14
                                   bool &Translated) {
6843
14
  hlsl::OP *HlslOp = &Helper.hlslOP;
6844
14
  IRBuilder<> Builder(CI);
6845
6846
14
  Value *ReturnVecPtr = CI->getArgOperand(1);
6847
14
  DXASSERT_NOMSG(isa<PointerType>(ReturnVecPtr->getType()));
6848
14
  Type *ReturnVecType = ReturnVecPtr->getType()->getPointerElementType();
6849
6850
14
  Value *Matrix = CI->getArgOperand(2);
6851
14
  Value *IsOutputSigned = CI->getArgOperand(3);
6852
14
  Value *InputVector = CI->getArgOperand(4);
6853
14
  Value *InputVectorInterp = CI->getArgOperand(5);
6854
14
  Value *BiasVector = CI->getArgOperand(6);
6855
14
  Value *BiasVectorInterp = CI->getArgOperand(7);
6856
6857
14
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6858
14
  Function *DxilFunc = HlslOp->GetOpFunc(
6859
14
      OpCode, {ReturnVecType, Matrix->getType(), InputVector->getType(),
6860
14
               BiasVector->getType()});
6861
6862
14
  Value *ReturnVec = Builder.CreateCall(
6863
14
      DxilFunc, {OpArg, Matrix, IsOutputSigned, InputVector, InputVectorInterp,
6864
14
                 BiasVector, BiasVectorInterp});
6865
14
  Builder.CreateStore(ReturnVec, ReturnVecPtr);
6866
6867
14
  return nullptr;
6868
14
}
6869
6870
Value *TranslateLinAlgMatrixLoadFromDescriptor(
6871
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6872
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
6873
10
    bool &Translated) {
6874
10
  hlsl::OP *HlslOp = &Helper.hlslOP;
6875
10
  IRBuilder<> Builder(CI);
6876
6877
10
  Value *MatrixPtr = CI->getArgOperand(1);
6878
10
  DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType()));
6879
10
  Type *MatrixType = MatrixPtr->getType()->getPointerElementType();
6880
6881
10
  Value *ResHandle = CI->getArgOperand(2);
6882
10
  Value *Offset = CI->getArgOperand(3);
6883
10
  Value *Stride = CI->getArgOperand(4);
6884
10
  Value *Layout = CI->getArgOperand(5);
6885
10
  Value *Align = CI->getArgOperand(6);
6886
6887
10
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6888
10
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, MatrixType);
6889
6890
10
  Value *Matrix = Builder.CreateCall(
6891
10
      DxilFunc, {OpArg, ResHandle, Offset, Stride, Layout, Align});
6892
10
  Builder.CreateStore(Matrix, MatrixPtr);
6893
6894
10
  return nullptr;
6895
10
}
6896
6897
Value *TranslateLinAlgMatrixOuterProduct(
6898
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6899
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
6900
12
    bool &Translated) {
6901
12
  hlsl::OP *HlslOp = &Helper.hlslOP;
6902
12
  IRBuilder<> Builder(CI);
6903
6904
12
  Value *MatrixPtr = CI->getArgOperand(1);
6905
12
  DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType()));
6906
12
  Type *MatrixType = MatrixPtr->getType()->getPointerElementType();
6907
12
  Value *VecA = CI->getArgOperand(2);
6908
12
  Value *VecB = CI->getArgOperand(3);
6909
6910
12
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6911
12
  Function *DxilFunc =
6912
12
      HlslOp->GetOpFunc(OpCode, {MatrixType, VecA->getType(), VecB->getType()});
6913
6914
12
  Value *Matrix = Builder.CreateCall(DxilFunc, {OpArg, VecA, VecB});
6915
12
  Builder.CreateStore(Matrix, MatrixPtr);
6916
6917
12
  return nullptr;
6918
12
}
6919
6920
Value *TranslateLinAlgMatrixAccumulate(CallInst *CI, IntrinsicOp IOP,
6921
                                       OP::OpCode OpCode,
6922
                                       HLOperationLowerHelper &Helper,
6923
                                       HLObjectOperationLowerHelper *ObjHelper,
6924
6
                                       bool &Translated) {
6925
6
  hlsl::OP *HlslOp = &Helper.hlslOP;
6926
6
  IRBuilder<> Builder(CI);
6927
6928
6
  Value *MatrixCPtr = CI->getArgOperand(1);
6929
6
  DXASSERT_NOMSG(isa<PointerType>(MatrixCPtr->getType()));
6930
6
  Type *MatrixCType = MatrixCPtr->getType()->getPointerElementType();
6931
6932
6
  Value *MatrixLHS = CI->getArgOperand(2);
6933
6
  Value *MatrixRHS = CI->getArgOperand(3);
6934
6935
6
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6936
6
  Function *DxilFunc = HlslOp->GetOpFunc(
6937
6
      OpCode, {MatrixCType, MatrixLHS->getType(), MatrixRHS->getType()});
6938
6939
6
  Value *MatrixC = Builder.CreateCall(DxilFunc, {OpArg, MatrixLHS, MatrixRHS});
6940
6
  Builder.CreateStore(MatrixC, MatrixCPtr);
6941
6942
6
  return nullptr;
6943
6
}
6944
6945
Value *TranslateLinAlgMatrixGetCoordinate(
6946
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
6947
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
6948
4
    bool &Translated) {
6949
4
  hlsl::OP *HlslOp = &Helper.hlslOP;
6950
4
  IRBuilder<> Builder(CI);
6951
6952
4
  Value *Matrix = CI->getArgOperand(1);
6953
4
  Value *Index = CI->getArgOperand(2);
6954
6955
4
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6956
4
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Matrix->getType());
6957
6958
4
  return Builder.CreateCall(DxilFunc, {OpArg, Matrix, Index});
6959
4
}
6960
6961
Value *TranslateLinAlgMatrixGetElement(CallInst *CI, IntrinsicOp IOP,
6962
                                       OP::OpCode OpCode,
6963
                                       HLOperationLowerHelper &Helper,
6964
                                       HLObjectOperationLowerHelper *ObjHelper,
6965
10
                                       bool &Translated) {
6966
10
  hlsl::OP *HlslOp = &Helper.hlslOP;
6967
10
  IRBuilder<> Builder(CI);
6968
6969
10
  Value *RetElemPtr = CI->getArgOperand(1);
6970
10
  DXASSERT_NOMSG(isa<PointerType>(RetElemPtr->getType()));
6971
10
  Type *RetTy = RetElemPtr->getType()->getPointerElementType();
6972
6973
10
  Value *Matrix = CI->getArgOperand(2);
6974
10
  Value *Index = CI->getArgOperand(3);
6975
6976
10
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
6977
10
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {RetTy, Matrix->getType()});
6978
6979
10
  Value *RetElem = Builder.CreateCall(DxilFunc, {OpArg, Matrix, Index});
6980
10
  Builder.CreateStore(RetElem, RetElemPtr);
6981
6982
10
  return nullptr;
6983
10
}
6984
6985
Value *TranslateLinAlgMatrixSetElement(CallInst *CI, IntrinsicOp IOP,
6986
                                       OP::OpCode OpCode,
6987
                                       HLOperationLowerHelper &Helper,
6988
                                       HLObjectOperationLowerHelper *ObjHelper,
6989
4
                                       bool &Translated) {
6990
4
  hlsl::OP *HlslOp = &Helper.hlslOP;
6991
4
  IRBuilder<> Builder(CI);
6992
6993
4
  Value *RetMatrixPtr = CI->getArgOperand(1);
6994
4
  DXASSERT_NOMSG(isa<PointerType>(RetMatrixPtr->getType()));
6995
4
  Type *RetMatrixTy = RetMatrixPtr->getType()->getPointerElementType();
6996
6997
4
  Value *InMatrix = CI->getArgOperand(2);
6998
4
  Value *Index = CI->getArgOperand(3);
6999
4
  Value *NewVal = CI->getArgOperand(4);
7000
7001
4
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
7002
4
  Function *DxilFunc = HlslOp->GetOpFunc(
7003
4
      OpCode, {RetMatrixTy, InMatrix->getType(), NewVal->getType()});
7004
7005
4
  Value *RetMatrix =
7006
4
      Builder.CreateCall(DxilFunc, {OpArg, InMatrix, Index, NewVal});
7007
4
  Builder.CreateStore(RetMatrix, RetMatrixPtr);
7008
7009
4
  return nullptr;
7010
4
}
7011
7012
Value *TranslateLinAlgMatrixMatrixMultiply(
7013
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
7014
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
7015
10
    bool &Translated) {
7016
10
  hlsl::OP *HlslOp = &Helper.hlslOP;
7017
10
  IRBuilder<> Builder(CI);
7018
7019
10
  Value *MatrixCPtr = CI->getArgOperand(1);
7020
10
  DXASSERT_NOMSG(isa<PointerType>(MatrixCPtr->getType()));
7021
10
  Type *MatrixCTy = MatrixCPtr->getType()->getPointerElementType();
7022
7023
10
  Value *MatrixA = CI->getArgOperand(2);
7024
10
  Value *MatrixB = CI->getArgOperand(3);
7025
7026
10
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
7027
10
  Function *DxilFunc = HlslOp->GetOpFunc(
7028
10
      OpCode, {MatrixCTy, MatrixA->getType(), MatrixB->getType()});
7029
7030
10
  Value *MatrixC = Builder.CreateCall(DxilFunc, {OpArg, MatrixA, MatrixB});
7031
10
  Builder.CreateStore(MatrixC, MatrixCPtr);
7032
7033
10
  return nullptr;
7034
10
}
7035
7036
Value *TranslateLinAlgMatrixMatrixMultiplyAccumulate(
7037
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
7038
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
7039
4
    bool &Translated) {
7040
4
  hlsl::OP *HlslOp = &Helper.hlslOP;
7041
4
  IRBuilder<> Builder(CI);
7042
7043
4
  Value *MatrixRPtr = CI->getArgOperand(1);
7044
4
  DXASSERT_NOMSG(isa<PointerType>(MatrixRPtr->getType()));
7045
4
  Type *MatrixRTy = MatrixRPtr->getType()->getPointerElementType();
7046
7047
4
  Value *MatrixA = CI->getArgOperand(2);
7048
4
  Value *MatrixB = CI->getArgOperand(3);
7049
4
  Value *MatrixC = CI->getArgOperand(4);
7050
7051
4
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
7052
4
  Function *DxilFunc =
7053
4
      HlslOp->GetOpFunc(OpCode, {MatrixRTy, MatrixA->getType(),
7054
4
                                 MatrixB->getType(), MatrixC->getType()});
7055
7056
4
  Value *MatrixR =
7057
4
      Builder.CreateCall(DxilFunc, {OpArg, MatrixA, MatrixB, MatrixC});
7058
4
  Builder.CreateStore(MatrixR, MatrixRPtr);
7059
7060
4
  return nullptr;
7061
4
}
7062
7063
Value *TranslateLinAlgCopyConvertMatrix(CallInst *CI, IntrinsicOp IOP,
7064
                                        OP::OpCode OpCode,
7065
                                        HLOperationLowerHelper &Helper,
7066
                                        HLObjectOperationLowerHelper *ObjHelper,
7067
6
                                        bool &Translated) {
7068
6
  hlsl::OP *HlslOp = &Helper.hlslOP;
7069
6
  IRBuilder<> Builder(CI);
7070
7071
6
  Value *MatrixRPtr = CI->getArgOperand(1);
7072
6
  DXASSERT_NOMSG(isa<PointerType>(MatrixRPtr->getType()));
7073
6
  Type *MatrixRTy = MatrixRPtr->getType()->getPointerElementType();
7074
7075
6
  Value *MatrixSrc = CI->getArgOperand(2);
7076
6
  Value *Transpose = CI->getArgOperand(3);
7077
7078
6
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
7079
6
  Function *DxilFunc =
7080
6
      HlslOp->GetOpFunc(OpCode, {MatrixRTy, MatrixSrc->getType()});
7081
7082
6
  Value *MatrixR = Builder.CreateCall(DxilFunc, {OpArg, MatrixSrc, Transpose});
7083
6
  Builder.CreateStore(MatrixR, MatrixRPtr);
7084
7085
6
  return nullptr;
7086
6
}
7087
7088
Value *TranslateLinAlgMatrixLoadFromMemory(
7089
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
7090
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
7091
4
    bool &Translated) {
7092
4
  hlsl::OP *HlslOp = &Helper.hlslOP;
7093
4
  IRBuilder<> Builder(CI);
7094
7095
4
  Value *MatrixPtr = CI->getArgOperand(1);
7096
4
  DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType()));
7097
4
  Type *MatrixType = MatrixPtr->getType()->getPointerElementType();
7098
7099
4
  Value *Arr = CI->getArgOperand(2);
7100
4
  Value *Offset = CI->getArgOperand(3);
7101
4
  Value *Stride = CI->getArgOperand(4);
7102
4
  Value *Layout = CI->getArgOperand(5);
7103
7104
4
  Value *Zero = Builder.getInt32(0);
7105
4
  Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero});
7106
4
  Type *ArrEltTy = ArrPtr->getType()->getPointerElementType();
7107
7108
4
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
7109
4
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, ArrEltTy});
7110
7111
4
  Value *Matrix =
7112
4
      Builder.CreateCall(DxilFunc, {OpArg, ArrPtr, Offset, Stride, Layout});
7113
4
  Builder.CreateStore(Matrix, MatrixPtr);
7114
7115
4
  return nullptr;
7116
4
}
7117
7118
Value *TranslateLinAlgMatrixAccumStoreToMemory(
7119
    CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
7120
    HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper,
7121
8
    bool &Translated) {
7122
8
  hlsl::OP *HlslOp = &Helper.hlslOP;
7123
8
  IRBuilder<> Builder(CI);
7124
7125
8
  Value *Matrix = CI->getArgOperand(1);
7126
8
  Value *Arr = CI->getArgOperand(2);
7127
8
  Value *Offset = CI->getArgOperand(3);
7128
8
  Value *Stride = CI->getArgOperand(4);
7129
8
  Value *Layout = CI->getArgOperand(5);
7130
7131
8
  Value *Zero = Builder.getInt32(0);
7132
8
  Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero});
7133
8
  Type *ArrEltTy = ArrPtr->getType()->getPointerElementType();
7134
7135
8
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
7136
8
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {Matrix->getType(), ArrEltTy});
7137
7138
8
  return Builder.CreateCall(DxilFunc,
7139
8
                            {OpArg, Matrix, ArrPtr, Offset, Stride, Layout});
7140
8
}
7141
7142
Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
7143
                              HLOperationLowerHelper &Helper,
7144
                              HLObjectOperationLowerHelper *ObjHelper,
7145
8
                              bool &Translated) {
7146
8
  hlsl::OP *HlslOp = &Helper.hlslOP;
7147
8
  IRBuilder<> Builder(CI);
7148
7149
8
  Value *OutVecPtr = CI->getArgOperand(1);
7150
8
  DXASSERT_NOMSG(isa<PointerType>(OutVecPtr->getType()));
7151
8
  Type *OutVecTy = OutVecPtr->getType()->getPointerElementType();
7152
8
  Value *InVec = CI->getArgOperand(2);
7153
8
  Value *InInterp = CI->getArgOperand(3);
7154
8
  Value *OutInterp = CI->getArgOperand(4);
7155
7156
8
  Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode);
7157
8
  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {OutVecTy, InVec->getType()});
7158
7159
8
  Value *OutVec =
7160
8
      Builder.CreateCall(DxilFunc, {OpArg, InVec, InInterp, OutInterp});
7161
8
  Builder.CreateStore(OutVec, OutVecPtr);
7162
7163
8
  return nullptr;
7164
8
}
7165
7166
} // namespace
7167
7168
// Lower table.
7169
namespace {
7170
7171
Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
7172
                  HLOperationLowerHelper &helper,
7173
6
                  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
7174
6
  Translated = false;
7175
6
  dxilutil::EmitErrorOnInstruction(CI, "Unsupported intrinsic.");
7176
6
  return nullptr;
7177
6
}
7178
7179
// SPIRV change starts
7180
Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP,
7181
                                  DXIL::OpCode opcode,
7182
                                  HLOperationLowerHelper &helper,
7183
                                  HLObjectOperationLowerHelper *pObjHelper,
7184
0
                                  bool &Translated) {
7185
0
  Translated = false;
7186
0
  dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic.");
7187
0
  return nullptr;
7188
0
}
7189
// SPIRV change ends
7190
7191
Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode,
7192
                         HLOperationLowerHelper &helper,
7193
                         HLObjectOperationLowerHelper *pObjHelper,
7194
0
                         bool &Translated) {
7195
  // Translated in DxilGenerationPass::GenerateStreamOutputOperation.
7196
  // Do nothing here.
7197
  // Mark not translated.
7198
0
  Translated = false;
7199
0
  return nullptr;
7200
0
}
7201
7202
// This table has to match IntrinsicOp orders
7203
constexpr IntrinsicLower gLowerTable[] = {
7204
    {IntrinsicOp::IOP_AcceptHitAndEndSearch,
7205
     TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch},
7206
    {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
7207
    {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
7208
    {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier,
7209
     DXIL::OpCode::Barrier},
7210
    {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery,
7211
     DXIL::OpCode::AllocateRayQuery},
7212
    {IntrinsicOp::IOP_Barrier, TranslateBarrier, DXIL::OpCode::NumOpCodes},
7213
    {IntrinsicOp::IOP_CallShader, TranslateCallShader,
7214
     DXIL::OpCode::CallShader},
7215
    {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess,
7216
     DXIL::OpCode::CheckAccessFullyMapped},
7217
    {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap,
7218
     DXIL::OpCode::CreateHandleFromHeap},
7219
    {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4,
7220
     DXIL::OpCode::NumOpCodes},
7221
    {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier,
7222
     DXIL::OpCode::Barrier},
7223
    {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier,
7224
     DXIL::OpCode::Barrier},
7225
    {IntrinsicOp::IOP_DispatchMesh, TrivialDispatchMesh,
7226
     DXIL::OpCode::DispatchMesh},
7227
    {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation,
7228
     DXIL::OpCode::DispatchRaysDimensions},
7229
    {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation,
7230
     DXIL::OpCode::DispatchRaysIndex},
7231
    {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample,
7232
     DXIL::OpCode::NumOpCodes},
7233
    {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid,
7234
     DXIL::OpCode::EvalCentroid},
7235
    {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped,
7236
     DXIL::OpCode::NumOpCodes},
7237
    {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation,
7238
     DXIL::OpCode::GeometryIndex},
7239
    {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex,
7240
     DXIL::OpCode::AttributeAtVertex},
7241
    {IntrinsicOp::IOP_GetRemainingRecursionLevels, TrivialNoArgOperation,
7242
     DXIL::OpCode::GetRemainingRecursionLevels},
7243
    {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation,
7244
     DXIL::OpCode::RenderTargetGetSampleCount},
7245
    {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos,
7246
     DXIL::OpCode::NumOpCodes},
7247
    {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier,
7248
     DXIL::OpCode::Barrier},
7249
    {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier,
7250
     DXIL::OpCode::Barrier},
7251
    {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation,
7252
     DXIL::OpCode::HitKind},
7253
    {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput,
7254
     DXIL::OpCode::IgnoreHit},
7255
    {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation,
7256
     DXIL::OpCode::InstanceID},
7257
    {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation,
7258
     DXIL::OpCode::InstanceIndex},
7259
    {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation,
7260
     DXIL::OpCode::NumOpCodes},
7261
    {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation,
7262
     DXIL::OpCode::NumOpCodes},
7263
    {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg,
7264
     DXIL::OpCode::NumOpCodes},
7265
    {IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise,
7266
     TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
7267
    {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg,
7268
     DXIL::OpCode::NumOpCodes},
7269
    {IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise,
7270
     TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
7271
    {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation,
7272
     DXIL::OpCode::NumOpCodes},
7273
    {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation,
7274
     DXIL::OpCode::NumOpCodes},
7275
    {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation,
7276
     DXIL::OpCode::NumOpCodes},
7277
    {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation,
7278
     DXIL::OpCode::NumOpCodes},
7279
    {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation,
7280
     DXIL::OpCode::NumOpCodes},
7281
    {IntrinsicOp::IOP_IsHelperLane, TrivialNoArgWithRetOperation,
7282
     DXIL::OpCode::IsHelperLane},
7283
    {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex,
7284
     DXIL::OpCode::NumOpCodes},
7285
    {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation,
7286
     DXIL::OpCode::ObjectRayDirection},
7287
    {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation,
7288
     DXIL::OpCode::ObjectRayOrigin},
7289
    {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation,
7290
     DXIL::OpCode::ObjectToWorld},
7291
    {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation,
7292
     DXIL::OpCode::ObjectToWorld},
7293
    {IntrinsicOp::IOP_ObjectToWorld4x3,
7294
     TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld},
7295
    {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation,
7296
     DXIL::OpCode::PrimitiveIndex},
7297
    {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors,
7298
     DXIL::OpCode::NumOpCodes},
7299
    {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors,
7300
     DXIL::OpCode::NumOpCodes},
7301
    {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors,
7302
     DXIL::OpCode::NumOpCodes},
7303
    {IntrinsicOp::IOP_ProcessIsolineTessFactors,
7304
     TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes},
7305
    {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors,
7306
     DXIL::OpCode::NumOpCodes},
7307
    {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors,
7308
     DXIL::OpCode::NumOpCodes},
7309
    {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors,
7310
     DXIL::OpCode::NumOpCodes},
7311
    {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors,
7312
     DXIL::OpCode::NumOpCodes},
7313
    {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors,
7314
     DXIL::OpCode::NumOpCodes},
7315
    {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors,
7316
     DXIL::OpCode::NumOpCodes},
7317
    {IntrinsicOp::IOP_QuadAll, TranslateQuadAnyAll, DXIL::OpCode::QuadVote},
7318
    {IntrinsicOp::IOP_QuadAny, TranslateQuadAnyAll, DXIL::OpCode::QuadVote},
7319
    {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross,
7320
     DXIL::OpCode::QuadOp},
7321
    {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross,
7322
     DXIL::OpCode::QuadOp},
7323
    {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross,
7324
     DXIL::OpCode::QuadOp},
7325
    {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt,
7326
     DXIL::OpCode::NumOpCodes},
7327
    {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation,
7328
     DXIL::OpCode::RayFlags},
7329
    {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation,
7330
     DXIL::OpCode::RayTCurrent},
7331
    {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation,
7332
     DXIL::OpCode::RayTMin},
7333
    {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection,
7334
     DXIL::OpCode::ReportHit},
7335
    {IntrinsicOp::IOP_SetMeshOutputCounts, TrivialSetMeshOutputCounts,
7336
     DXIL::OpCode::SetMeshOutputCounts},
7337
    {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay},
7338
    {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual,
7339
     DXIL::OpCode::WaveActiveAllEqual},
7340
    {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B,
7341
     DXIL::OpCode::WaveAllTrue},
7342
    {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B,
7343
     DXIL::OpCode::WaveAnyTrue},
7344
    {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot,
7345
     DXIL::OpCode::WaveActiveBallot},
7346
    {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A,
7347
     DXIL::OpCode::WaveActiveBit},
7348
    {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A,
7349
     DXIL::OpCode::WaveActiveBit},
7350
    {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A,
7351
     DXIL::OpCode::WaveActiveBit},
7352
    {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B,
7353
     DXIL::OpCode::WaveAllBitCount},
7354
    {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A,
7355
     DXIL::OpCode::WaveActiveOp},
7356
    {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A,
7357
     DXIL::OpCode::WaveActiveOp},
7358
    {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A,
7359
     DXIL::OpCode::WaveActiveOp},
7360
    {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A,
7361
     DXIL::OpCode::WaveActiveOp},
7362
    {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal,
7363
     DXIL::OpCode::WaveGetLaneCount},
7364
    {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal,
7365
     DXIL::OpCode::WaveGetLaneIndex},
7366
    {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal,
7367
     DXIL::OpCode::WaveIsFirstLane},
7368
    {IntrinsicOp::IOP_WaveMatch, TranslateWaveMatch, DXIL::OpCode::WaveMatch},
7369
    {IntrinsicOp::IOP_WaveMultiPrefixBitAnd, TranslateWaveMultiPrefix,
7370
     DXIL::OpCode::WaveMultiPrefixOp},
7371
    {IntrinsicOp::IOP_WaveMultiPrefixBitOr, TranslateWaveMultiPrefix,
7372
     DXIL::OpCode::WaveMultiPrefixOp},
7373
    {IntrinsicOp::IOP_WaveMultiPrefixBitXor, TranslateWaveMultiPrefix,
7374
     DXIL::OpCode::WaveMultiPrefixOp},
7375
    {IntrinsicOp::IOP_WaveMultiPrefixCountBits,
7376
     TranslateWaveMultiPrefixBitCount, DXIL::OpCode::WaveMultiPrefixBitCount},
7377
    {IntrinsicOp::IOP_WaveMultiPrefixProduct, TranslateWaveMultiPrefix,
7378
     DXIL::OpCode::WaveMultiPrefixOp},
7379
    {IntrinsicOp::IOP_WaveMultiPrefixSum, TranslateWaveMultiPrefix,
7380
     DXIL::OpCode::WaveMultiPrefixOp},
7381
    {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B,
7382
     DXIL::OpCode::WavePrefixBitCount},
7383
    {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A,
7384
     DXIL::OpCode::WavePrefixOp},
7385
    {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A,
7386
     DXIL::OpCode::WavePrefixOp},
7387
    {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt,
7388
     DXIL::OpCode::WaveReadLaneAt},
7389
    {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst,
7390
     DXIL::OpCode::WaveReadLaneFirst},
7391
    {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation,
7392
     DXIL::OpCode::WorldRayDirection},
7393
    {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation,
7394
     DXIL::OpCode::WorldRayOrigin},
7395
    {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation,
7396
     DXIL::OpCode::WorldToObject},
7397
    {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation,
7398
     DXIL::OpCode::WorldToObject},
7399
    {IntrinsicOp::IOP_WorldToObject4x3,
7400
     TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject},
7401
    {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes},
7402
    {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes},
7403
    {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos},
7404
    {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes},
7405
    {IntrinsicOp::IOP_and, TranslateAnd, DXIL::OpCode::NumOpCodes},
7406
    {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes},
7407
    {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble},
7408
    {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes},
7409
    {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
7410
    {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin},
7411
    {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes},
7412
    {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes},
7413
    {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble},
7414
    {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes},
7415
    {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan},
7416
    {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes},
7417
    {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi},
7418
    {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
7419
    {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes},
7420
    {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos},
7421
    {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos},
7422
    {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet,
7423
     DXIL::OpCode::Countbits},
7424
    {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes},
7425
    {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX},
7426
    {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation,
7427
     DXIL::OpCode::DerivCoarseX},
7428
    {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation,
7429
     DXIL::OpCode::DerivFineX},
7430
    {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY},
7431
    {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation,
7432
     DXIL::OpCode::DerivCoarseY},
7433
    {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation,
7434
     DXIL::OpCode::DerivFineY},
7435
    {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes},
7436
    {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes},
7437
    {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes},
7438
    {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes},
7439
    {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf},
7440
    {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked,
7441
     DXIL::OpCode::Dot4AddI8Packed},
7442
    {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked,
7443
     DXIL::OpCode::Dot4AddU8Packed},
7444
    {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes},
7445
    {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes},
7446
    {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp},
7447
    {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32,
7448
     DXIL::OpCode::LegacyF16ToF32},
7449
    {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16,
7450
     DXIL::OpCode::LegacyF32ToF16},
7451
    {IntrinsicOp::IOP_faceforward, TranslateFaceforward,
7452
     DXIL::OpCode::NumOpCodes},
7453
    {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi,
7454
     DXIL::OpCode::FirstbitSHi},
7455
    {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo,
7456
     DXIL::OpCode::FirstbitLo},
7457
    {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni},
7458
    {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma},
7459
    {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes},
7460
    {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc},
7461
    {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes},
7462
    {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes},
7463
    {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite},
7464
    {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf},
7465
    {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN},
7466
    {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes},
7467
    {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes},
7468
    {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes},
7469
    {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes},
7470
    {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes},
7471
    {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes},
7472
    {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log},
7473
    {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad},
7474
    {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax},
7475
    {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin},
7476
    {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes},
7477
    {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes},
7478
    {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes},
7479
    {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes},
7480
    {IntrinsicOp::IOP_or, TranslateOr, DXIL::OpCode::NumOpCodes},
7481
    {IntrinsicOp::IOP_pack_clamp_s8, TranslatePack, DXIL::OpCode::Pack4x8},
7482
    {IntrinsicOp::IOP_pack_clamp_u8, TranslatePack, DXIL::OpCode::Pack4x8},
7483
    {IntrinsicOp::IOP_pack_s8, TranslatePack, DXIL::OpCode::Pack4x8},
7484
    {IntrinsicOp::IOP_pack_u8, TranslatePack, DXIL::OpCode::Pack4x8},
7485
    {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes},
7486
    {IntrinsicOp::IOP_printf, TranslatePrintf, DXIL::OpCode::NumOpCodes},
7487
    {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes},
7488
    {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes},
7489
    {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes},
7490
    {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes},
7491
    {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev},
7492
    {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne},
7493
    {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt},
7494
    {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate},
7495
    {IntrinsicOp::IOP_select, TranslateSelect, DXIL::OpCode::NumOpCodes},
7496
    {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes},
7497
    {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin},
7498
    {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes},
7499
    {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin},
7500
    {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep,
7501
     DXIL::OpCode::NumOpCodes},
7502
    {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes},
7503
    {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt},
7504
    {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes},
7505
    {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan},
7506
    {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan},
7507
    {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes},
7508
    {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7509
    {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7510
    {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
7511
    {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7512
    {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes},
7513
    {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7514
    {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7515
    {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
7516
    {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7517
    {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes},
7518
    {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7519
    {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7520
    {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes},
7521
    {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7522
    {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes},
7523
    {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes},
7524
    {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes},
7525
    {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes},
7526
    {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes},
7527
    {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes},
7528
    {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z},
7529
    {IntrinsicOp::IOP_unpack_s8s16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7530
    {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7531
    {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7532
    {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8},
7533
    {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic,
7534
     DXIL::OpCode::NumOpCodes},
7535
    {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic,
7536
     DXIL::OpCode::NumOpCodes},
7537
    {IntrinsicOp::IOP_VkReadClock, UnsupportedVulkanIntrinsic,
7538
     DXIL::OpCode::NumOpCodes},
7539
    {IntrinsicOp::IOP_Vkext_execution_mode, UnsupportedVulkanIntrinsic,
7540
     DXIL::OpCode::NumOpCodes},
7541
    {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic,
7542
     DXIL::OpCode::NumOpCodes},
7543
    {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream},
7544
    {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream},
7545
    {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD,
7546
     DXIL::OpCode::NumOpCodes},
7547
    {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD,
7548
     DXIL::OpCode::NumOpCodes},
7549
    {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions,
7550
     DXIL::OpCode::NumOpCodes},
7551
    {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7552
    {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample},
7553
    {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias},
7554
    {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp},
7555
    {IntrinsicOp::MOP_SampleCmpBias, TranslateSample,
7556
     DXIL::OpCode::SampleCmpBias},
7557
    {IntrinsicOp::MOP_SampleCmpGrad, TranslateSample,
7558
     DXIL::OpCode::SampleCmpGrad},
7559
    {IntrinsicOp::MOP_SampleCmpLevel, TranslateSample,
7560
     DXIL::OpCode::SampleCmpLevel},
7561
    {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample,
7562
     DXIL::OpCode::SampleCmpLevelZero},
7563
    {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad},
7564
    {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel},
7565
    {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather},
7566
    {IntrinsicOp::MOP_GatherAlpha, TranslateGather,
7567
     DXIL::OpCode::TextureGather},
7568
    {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather},
7569
    {IntrinsicOp::MOP_GatherCmp, TranslateGather,
7570
     DXIL::OpCode::TextureGatherCmp},
7571
    {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather,
7572
     DXIL::OpCode::TextureGatherCmp},
7573
    {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather,
7574
     DXIL::OpCode::TextureGatherCmp},
7575
    {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather,
7576
     DXIL::OpCode::TextureGatherCmp},
7577
    {IntrinsicOp::MOP_GatherCmpRed, TranslateGather,
7578
     DXIL::OpCode::TextureGatherCmp},
7579
    {IntrinsicOp::MOP_GatherGreen, TranslateGather,
7580
     DXIL::OpCode::TextureGather},
7581
    {IntrinsicOp::MOP_GatherRaw, TranslateGather,
7582
     DXIL::OpCode::TextureGatherRaw},
7583
    {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather},
7584
    {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition,
7585
     DXIL::OpCode::NumOpCodes},
7586
    {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7587
    {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7588
    {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes},
7589
    {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation,
7590
     DXIL::OpCode::NumOpCodes},
7591
    {IntrinsicOp::MOP_InterlockedAdd64, TranslateMopAtomicBinaryOperation,
7592
     DXIL::OpCode::NumOpCodes},
7593
    {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation,
7594
     DXIL::OpCode::NumOpCodes},
7595
    {IntrinsicOp::MOP_InterlockedAnd64, TranslateMopAtomicBinaryOperation,
7596
     DXIL::OpCode::NumOpCodes},
7597
    {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg,
7598
     DXIL::OpCode::NumOpCodes},
7599
    {IntrinsicOp::MOP_InterlockedCompareExchange64, TranslateMopAtomicCmpXChg,
7600
     DXIL::OpCode::NumOpCodes},
7601
    {IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise,
7602
     TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
7603
    {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg,
7604
     DXIL::OpCode::NumOpCodes},
7605
    {IntrinsicOp::MOP_InterlockedCompareStore64, TranslateMopAtomicCmpXChg,
7606
     DXIL::OpCode::NumOpCodes},
7607
    {IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise,
7608
     TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes},
7609
    {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation,
7610
     DXIL::OpCode::NumOpCodes},
7611
    {IntrinsicOp::MOP_InterlockedExchange64, TranslateMopAtomicBinaryOperation,
7612
     DXIL::OpCode::NumOpCodes},
7613
    {IntrinsicOp::MOP_InterlockedExchangeFloat,
7614
     TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes},
7615
    {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation,
7616
     DXIL::OpCode::NumOpCodes},
7617
    {IntrinsicOp::MOP_InterlockedMax64, TranslateMopAtomicBinaryOperation,
7618
     DXIL::OpCode::NumOpCodes},
7619
    {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation,
7620
     DXIL::OpCode::NumOpCodes},
7621
    {IntrinsicOp::MOP_InterlockedMin64, TranslateMopAtomicBinaryOperation,
7622
     DXIL::OpCode::NumOpCodes},
7623
    {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation,
7624
     DXIL::OpCode::NumOpCodes},
7625
    {IntrinsicOp::MOP_InterlockedOr64, TranslateMopAtomicBinaryOperation,
7626
     DXIL::OpCode::NumOpCodes},
7627
    {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation,
7628
     DXIL::OpCode::NumOpCodes},
7629
    {IntrinsicOp::MOP_InterlockedXor64, TranslateMopAtomicBinaryOperation,
7630
     DXIL::OpCode::NumOpCodes},
7631
    {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7632
    {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7633
    {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7634
    {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes},
7635
    {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter,
7636
     DXIL::OpCode::NumOpCodes},
7637
    {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter,
7638
     DXIL::OpCode::NumOpCodes},
7639
    {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes},
7640
    {IntrinsicOp::MOP_WriteSamplerFeedback, TranslateWriteSamplerFeedback,
7641
     DXIL::OpCode::WriteSamplerFeedback},
7642
    {IntrinsicOp::MOP_WriteSamplerFeedbackBias, TranslateWriteSamplerFeedback,
7643
     DXIL::OpCode::WriteSamplerFeedbackBias},
7644
    {IntrinsicOp::MOP_WriteSamplerFeedbackGrad, TranslateWriteSamplerFeedback,
7645
     DXIL::OpCode::WriteSamplerFeedbackGrad},
7646
    {IntrinsicOp::MOP_WriteSamplerFeedbackLevel, TranslateWriteSamplerFeedback,
7647
     DXIL::OpCode::WriteSamplerFeedbackLevel},
7648
7649
    {IntrinsicOp::MOP_Abort, TranslateGenericRayQueryMethod,
7650
     DXIL::OpCode::RayQuery_Abort},
7651
    {IntrinsicOp::MOP_CandidateGeometryIndex, TranslateGenericRayQueryMethod,
7652
     DXIL::OpCode::RayQuery_CandidateGeometryIndex},
7653
    {IntrinsicOp::MOP_CandidateInstanceContributionToHitGroupIndex,
7654
     TranslateGenericRayQueryMethod,
7655
     DXIL::OpCode::RayQuery_CandidateInstanceContributionToHitGroupIndex},
7656
    {IntrinsicOp::MOP_CandidateInstanceID, TranslateGenericRayQueryMethod,
7657
     DXIL::OpCode::RayQuery_CandidateInstanceID},
7658
    {IntrinsicOp::MOP_CandidateInstanceIndex, TranslateGenericRayQueryMethod,
7659
     DXIL::OpCode::RayQuery_CandidateInstanceIndex},
7660
    {IntrinsicOp::MOP_CandidateObjectRayDirection,
7661
     TranslateRayQueryFloat3Getter,
7662
     DXIL::OpCode::RayQuery_CandidateObjectRayDirection},
7663
    {IntrinsicOp::MOP_CandidateObjectRayOrigin, TranslateRayQueryFloat3Getter,
7664
     DXIL::OpCode::RayQuery_CandidateObjectRayOrigin},
7665
    {IntrinsicOp::MOP_CandidateObjectToWorld3x4,
7666
     TranslateRayQueryMatrix3x4Operation,
7667
     DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
7668
    {IntrinsicOp::MOP_CandidateObjectToWorld4x3,
7669
     TranslateRayQueryTransposedMatrix3x4Operation,
7670
     DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4},
7671
    {IntrinsicOp::MOP_CandidatePrimitiveIndex, TranslateGenericRayQueryMethod,
7672
     DXIL::OpCode::RayQuery_CandidatePrimitiveIndex},
7673
    {IntrinsicOp::MOP_CandidateProceduralPrimitiveNonOpaque,
7674
     TranslateGenericRayQueryMethod,
7675
     DXIL::OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque},
7676
    {IntrinsicOp::MOP_CandidateTriangleBarycentrics,
7677
     TranslateRayQueryFloat2Getter,
7678
     DXIL::OpCode::RayQuery_CandidateTriangleBarycentrics},
7679
    {IntrinsicOp::MOP_CandidateTriangleFrontFace,
7680
     TranslateGenericRayQueryMethod,
7681
     DXIL::OpCode::RayQuery_CandidateTriangleFrontFace},
7682
    {IntrinsicOp::MOP_CandidateTriangleRayT, TranslateGenericRayQueryMethod,
7683
     DXIL::OpCode::RayQuery_CandidateTriangleRayT},
7684
    {IntrinsicOp::MOP_CandidateType, TranslateGenericRayQueryMethod,
7685
     DXIL::OpCode::RayQuery_CandidateType},
7686
    {IntrinsicOp::MOP_CandidateWorldToObject3x4,
7687
     TranslateRayQueryMatrix3x4Operation,
7688
     DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
7689
    {IntrinsicOp::MOP_CandidateWorldToObject4x3,
7690
     TranslateRayQueryTransposedMatrix3x4Operation,
7691
     DXIL::OpCode::RayQuery_CandidateWorldToObject3x4},
7692
    {IntrinsicOp::MOP_CommitNonOpaqueTriangleHit,
7693
     TranslateGenericRayQueryMethod,
7694
     DXIL::OpCode::RayQuery_CommitNonOpaqueTriangleHit},
7695
    {IntrinsicOp::MOP_CommitProceduralPrimitiveHit,
7696
     TranslateCommitProceduralPrimitiveHit,
7697
     DXIL::OpCode::RayQuery_CommitProceduralPrimitiveHit},
7698
    {IntrinsicOp::MOP_CommittedGeometryIndex, TranslateGenericRayQueryMethod,
7699
     DXIL::OpCode::RayQuery_CommittedGeometryIndex},
7700
    {IntrinsicOp::MOP_CommittedInstanceContributionToHitGroupIndex,
7701
     TranslateGenericRayQueryMethod,
7702
     DXIL::OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex},
7703
    {IntrinsicOp::MOP_CommittedInstanceID, TranslateGenericRayQueryMethod,
7704
     DXIL::OpCode::RayQuery_CommittedInstanceID},
7705
    {IntrinsicOp::MOP_CommittedInstanceIndex, TranslateGenericRayQueryMethod,
7706
     DXIL::OpCode::RayQuery_CommittedInstanceIndex},
7707
    {IntrinsicOp::MOP_CommittedObjectRayDirection,
7708
     TranslateRayQueryFloat3Getter,
7709
     DXIL::OpCode::RayQuery_CommittedObjectRayDirection},
7710
    {IntrinsicOp::MOP_CommittedObjectRayOrigin, TranslateRayQueryFloat3Getter,
7711
     DXIL::OpCode::RayQuery_CommittedObjectRayOrigin},
7712
    {IntrinsicOp::MOP_CommittedObjectToWorld3x4,
7713
     TranslateRayQueryMatrix3x4Operation,
7714
     DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
7715
    {IntrinsicOp::MOP_CommittedObjectToWorld4x3,
7716
     TranslateRayQueryTransposedMatrix3x4Operation,
7717
     DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4},
7718
    {IntrinsicOp::MOP_CommittedPrimitiveIndex, TranslateGenericRayQueryMethod,
7719
     DXIL::OpCode::RayQuery_CommittedPrimitiveIndex},
7720
    {IntrinsicOp::MOP_CommittedRayT, TranslateGenericRayQueryMethod,
7721
     DXIL::OpCode::RayQuery_CommittedRayT},
7722
    {IntrinsicOp::MOP_CommittedStatus, TranslateGenericRayQueryMethod,
7723
     DXIL::OpCode::RayQuery_CommittedStatus},
7724
    {IntrinsicOp::MOP_CommittedTriangleBarycentrics,
7725
     TranslateRayQueryFloat2Getter,
7726
     DXIL::OpCode::RayQuery_CommittedTriangleBarycentrics},
7727
    {IntrinsicOp::MOP_CommittedTriangleFrontFace,
7728
     TranslateGenericRayQueryMethod,
7729
     DXIL::OpCode::RayQuery_CommittedTriangleFrontFace},
7730
    {IntrinsicOp::MOP_CommittedWorldToObject3x4,
7731
     TranslateRayQueryMatrix3x4Operation,
7732
     DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
7733
    {IntrinsicOp::MOP_CommittedWorldToObject4x3,
7734
     TranslateRayQueryTransposedMatrix3x4Operation,
7735
     DXIL::OpCode::RayQuery_CommittedWorldToObject3x4},
7736
    {IntrinsicOp::MOP_Proceed, TranslateGenericRayQueryMethod,
7737
     DXIL::OpCode::RayQuery_Proceed},
7738
    {IntrinsicOp::MOP_RayFlags, TranslateGenericRayQueryMethod,
7739
     DXIL::OpCode::RayQuery_RayFlags},
7740
    {IntrinsicOp::MOP_RayTMin, TranslateGenericRayQueryMethod,
7741
     DXIL::OpCode::RayQuery_RayTMin},
7742
    {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline,
7743
     DXIL::OpCode::RayQuery_TraceRayInline},
7744
    {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter,
7745
     DXIL::OpCode::RayQuery_WorldRayDirection},
7746
    {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter,
7747
     DXIL::OpCode::RayQuery_WorldRayOrigin},
7748
    {IntrinsicOp::MOP_Count, TranslateNodeGetInputRecordCount,
7749
     DXIL::OpCode::GetInputRecordCount},
7750
    {IntrinsicOp::MOP_FinishedCrossGroupSharing,
7751
     TranslateNodeFinishedCrossGroupSharing,
7752
     DXIL::OpCode::FinishedCrossGroupSharing},
7753
    {IntrinsicOp::MOP_GetGroupNodeOutputRecords,
7754
     TranslateGetGroupNodeOutputRecords,
7755
     DXIL::OpCode::AllocateNodeOutputRecords},
7756
    {IntrinsicOp::MOP_GetThreadNodeOutputRecords,
7757
     TranslateGetThreadNodeOutputRecords,
7758
     DXIL::OpCode::AllocateNodeOutputRecords},
7759
    {IntrinsicOp::MOP_IsValid, TranslateNodeOutputIsValid,
7760
     DXIL::OpCode::NodeOutputIsValid},
7761
    {IntrinsicOp::MOP_GroupIncrementOutputCount,
7762
     TranslateNodeGroupIncrementOutputCount,
7763
     DXIL::OpCode::IncrementOutputCount},
7764
    {IntrinsicOp::MOP_ThreadIncrementOutputCount,
7765
     TranslateNodeThreadIncrementOutputCount,
7766
     DXIL::OpCode::IncrementOutputCount},
7767
    {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete,
7768
     DXIL::OpCode::OutputComplete},
7769
7770
    // SPIRV change starts
7771
    {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic,
7772
     DXIL::OpCode::NumOpCodes},
7773
    // SPIRV change ends
7774
7775
    // Manually added part.
7776
    {IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation,
7777
     DXIL::OpCode::NumOpCodes},
7778
    {IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation,
7779
     DXIL::OpCode::NumOpCodes},
7780
    {IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A,
7781
     DXIL::OpCode::WaveActiveOp},
7782
    {IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A,
7783
     DXIL::OpCode::WaveActiveOp},
7784
    {IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A,
7785
     DXIL::OpCode::WaveActiveOp},
7786
    {IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A,
7787
     DXIL::OpCode::WaveActiveOp},
7788
    {IntrinsicOp::IOP_WaveMultiPrefixUProduct, TranslateWaveMultiPrefix,
7789
     DXIL::OpCode::WaveMultiPrefixOp},
7790
    {IntrinsicOp::IOP_WaveMultiPrefixUSum, TranslateWaveMultiPrefix,
7791
     DXIL::OpCode::WaveMultiPrefixOp},
7792
    {IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A,
7793
     DXIL::OpCode::WavePrefixOp},
7794
    {IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A,
7795
     DXIL::OpCode::WavePrefixOp},
7796
    {IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes},
7797
    {IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
7798
    {IntrinsicOp::IOP_udot, TranslateDot, DXIL::OpCode::NumOpCodes},
7799
    {IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi,
7800
     DXIL::OpCode::FirstbitHi},
7801
    {IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
7802
    {IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
7803
    {IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin},
7804
    {IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul},
7805
    {IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax},
7806
    {IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation,
7807
     DXIL::OpCode::NumOpCodes},
7808
    {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation,
7809
     DXIL::OpCode::NumOpCodes},
7810
    {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMakeNop,
7811
     DXIL::OpCode::HitObject_MakeNop},
7812
    {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread,
7813
     DXIL::OpCode::MaybeReorderThread},
7814
    {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic,
7815
     DXIL::OpCode::NumOpCodes},
7816
    {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic,
7817
     DXIL::OpCode::NumOpCodes},
7818
    {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic,
7819
     DXIL::OpCode::NumOpCodes},
7820
    {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery,
7821
     DXIL::OpCode::HitObject_FromRayQuery},
7822
    {IntrinsicOp::MOP_DxHitObject_GetAttributes,
7823
     TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes},
7824
    {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex,
7825
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex},
7826
    {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter,
7827
     DXIL::OpCode::HitObject_HitKind},
7828
    {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter,
7829
     DXIL::OpCode::HitObject_InstanceID},
7830
    {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex,
7831
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex},
7832
    {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection,
7833
     TranslateHitObjectVectorGetter,
7834
     DXIL::OpCode::HitObject_ObjectRayDirection},
7835
    {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin,
7836
     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin},
7837
    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4,
7838
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
7839
    {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3,
7840
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4},
7841
    {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex,
7842
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex},
7843
    {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter,
7844
     DXIL::OpCode::HitObject_RayFlags},
7845
    {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent,
7846
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent},
7847
    {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter,
7848
     DXIL::OpCode::HitObject_RayTMin},
7849
    {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex,
7850
     TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex},
7851
    {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection,
7852
     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection},
7853
    {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin,
7854
     TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin},
7855
    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4,
7856
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
7857
    {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3,
7858
     TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4},
7859
    {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke,
7860
     DXIL::OpCode::HitObject_Invoke},
7861
    {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter,
7862
     DXIL::OpCode::HitObject_IsHit},
7863
    {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter,
7864
     DXIL::OpCode::HitObject_IsMiss},
7865
    {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter,
7866
     DXIL::OpCode::HitObject_IsNop},
7867
    {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant,
7868
     TranslateHitObjectLoadLocalRootTableConstant,
7869
     DXIL::OpCode::HitObject_LoadLocalRootTableConstant},
7870
    {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMakeMiss,
7871
     DXIL::OpCode::HitObject_MakeMiss},
7872
    {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex,
7873
     TranslateHitObjectSetShaderTableIndex,
7874
     DXIL::OpCode::HitObject_SetShaderTableIndex},
7875
    {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay,
7876
     DXIL::OpCode::HitObject_TraceRay},
7877
7878
    {IntrinsicOp::IOP_isnormal, TrivialIsSpecialFloat, DXIL::OpCode::IsNormal},
7879
7880
    {IntrinsicOp::IOP_GetGroupWaveCount, TranslateWaveToVal,
7881
     DXIL::OpCode::GetGroupWaveCount},
7882
    {IntrinsicOp::IOP_GetGroupWaveIndex, TranslateWaveToVal,
7883
     DXIL::OpCode::GetGroupWaveIndex},
7884
7885
    {IntrinsicOp::IOP_ClusterID, TrivialNoArgWithRetNoOverloadOperation,
7886
     DXIL::OpCode::ClusterID},
7887
    {IntrinsicOp::MOP_CandidateClusterID, TranslateGenericRayQueryMethod,
7888
     DXIL::OpCode::RayQuery_CandidateClusterID},
7889
    {IntrinsicOp::MOP_CommittedClusterID, TranslateGenericRayQueryMethod,
7890
     DXIL::OpCode::RayQuery_CommittedClusterID},
7891
    {IntrinsicOp::MOP_DxHitObject_GetClusterID, TranslateHitObjectScalarGetter,
7892
     DXIL::OpCode::HitObject_ClusterID},
7893
7894
    {IntrinsicOp::IOP_TriangleObjectPositions, TranslateTriangleObjectPositions,
7895
     DXIL::OpCode::TriangleObjectPosition},
7896
    {IntrinsicOp::MOP_CandidateTriangleObjectPositions,
7897
     TranslateRayQueryTriangleObjectPositions,
7898
     DXIL::OpCode::RayQuery_CandidateTriangleObjectPosition},
7899
    {IntrinsicOp::MOP_CommittedTriangleObjectPositions,
7900
     TranslateRayQueryTriangleObjectPositions,
7901
     DXIL::OpCode::RayQuery_CommittedTriangleObjectPosition},
7902
    {IntrinsicOp::MOP_DxHitObject_TriangleObjectPositions,
7903
     TranslateHitObjectTriangleObjectPositions,
7904
     DXIL::OpCode::HitObject_TriangleObjectPosition},
7905
7906
    {IntrinsicOp::IOP___builtin_LinAlg_CopyConvertMatrix,
7907
     TranslateLinAlgCopyConvertMatrix, DXIL::OpCode::LinAlgCopyConvertMatrix},
7908
    {IntrinsicOp::IOP___builtin_LinAlg_FillMatrix, TranslateLinAlgFillMatrix,
7909
     DXIL::OpCode::LinAlgFillMatrix},
7910
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixGetCoordinate,
7911
     TranslateLinAlgMatrixGetCoordinate,
7912
     DXIL::OpCode::LinAlgMatrixGetCoordinate},
7913
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixGetElement,
7914
     TranslateLinAlgMatrixGetElement, DXIL::OpCode::LinAlgMatrixGetElement},
7915
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixLength, TrivialUnaryOperation,
7916
     DXIL::OpCode::LinAlgMatrixLength},
7917
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromDescriptor,
7918
     TranslateLinAlgMatrixLoadFromDescriptor,
7919
     DXIL::OpCode::LinAlgMatrixLoadFromDescriptor},
7920
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory,
7921
     TranslateLinAlgMatrixLoadFromMemory,
7922
     DXIL::OpCode::LinAlgMatrixLoadFromMemory},
7923
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixSetElement,
7924
     TranslateLinAlgMatrixSetElement, DXIL::OpCode::LinAlgMatrixSetElement},
7925
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToDescriptor,
7926
     TranslateLinAlgMatrixAccumStoreToDescriptor,
7927
     DXIL::OpCode::LinAlgMatrixStoreToDescriptor},
7928
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory,
7929
     TranslateLinAlgMatrixAccumStoreToMemory,
7930
     DXIL::OpCode::LinAlgMatrixStoreToMemory},
7931
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulate,
7932
     TranslateLinAlgMatrixAccumulate, DXIL::OpCode::LinAlgMatrixAccumulate},
7933
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiply,
7934
     TranslateLinAlgMatrixMatrixMultiply, DXIL::OpCode::LinAlgMatrixMultiply},
7935
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate,
7936
     TranslateLinAlgMatrixMatrixMultiplyAccumulate,
7937
     DXIL::OpCode::LinAlgMatrixMultiplyAccumulate},
7938
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout,
7939
     TrivialNoArgOperation, DXIL::OpCode::LinAlgMatrixQueryAccumulatorLayout},
7940
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToDescriptor,
7941
     TranslateLinAlgMatrixAccumStoreToDescriptor,
7942
     DXIL::OpCode::LinAlgMatrixAccumulateToDescriptor},
7943
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory,
7944
     TranslateLinAlgMatrixAccumStoreToMemory,
7945
     DXIL::OpCode::LinAlgMatrixAccumulateToMemory},
7946
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixOuterProduct,
7947
     TranslateLinAlgMatrixOuterProduct, DXIL::OpCode::LinAlgMatrixOuterProduct},
7948
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixVectorMultiply,
7949
     TranslateLinAlgMatVecMul, DXIL::OpCode::LinAlgMatVecMul},
7950
    {IntrinsicOp::IOP___builtin_LinAlg_MatrixVectorMultiplyAdd,
7951
     TranslateLinAlgMatVecMulAdd, DXIL::OpCode::LinAlgMatVecMulAdd},
7952
7953
    {IntrinsicOp::IOP_DebugBreak, TrivialNoArgOperation,
7954
     DXIL::OpCode::DebugBreak},
7955
    {IntrinsicOp::IOP_DxIsDebuggerPresent, TranslateWaveToVal,
7956
     DXIL::OpCode::IsDebuggerPresent},
7957
7958
    {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert,
7959
     DXIL::OpCode::LinAlgConvert},
7960
};
7961
constexpr size_t NumLowerTableEntries =
7962
    sizeof(gLowerTable) / sizeof(gLowerTable[0]);
7963
static_assert(
7964
    NumLowerTableEntries == static_cast<size_t>(IntrinsicOp::Num_Intrinsics),
7965
    "Intrinsic lowering table must be updated to account for new intrinsics.");
7966
7967
// Make table-order failures report the bad index via template instantiation
7968
// parameter in the diagnostic.
7969
// On failure, use hlsl_intrinsic_opcodes.json to find the mismatch.
7970
template <size_t I> struct ValidateLowerTableEntry {
7971
  // Instantiate a type that fails if the opcode doesn't match the index.
7972
  static_assert(
7973
      I == static_cast<size_t>(gLowerTable[I].IntriOpcode),
7974
      "Intrinsic lowering table is out of order. "
7975
      "See ValidateLowerTableEntry<I> template instantiation for Index.");
7976
  static constexpr bool Value =
7977
      I == static_cast<size_t>(gLowerTable[I].IntriOpcode);
7978
};
7979
7980
template <size_t I, size_t N> struct ValidateLowerTableImpl {
7981
  static constexpr bool Value = ValidateLowerTableEntry<I>::Value &&
7982
                                ValidateLowerTableImpl<I + 1, N>::Value;
7983
};
7984
7985
template <size_t N> struct ValidateLowerTableImpl<N, N> {
7986
  static constexpr bool Value = true;
7987
};
7988
7989
static_assert(ValidateLowerTableImpl<0, NumLowerTableEntries>::Value,
7990
              "Intrinsic lowering table is out of order.");
7991
} // namespace
7992
7993
static void TranslateBuiltinIntrinsic(CallInst *CI,
7994
                                      HLOperationLowerHelper &helper,
7995
                                      HLObjectOperationLowerHelper *pObjHelper,
7996
65.8k
                                      bool &Translated) {
7997
65.8k
  unsigned opcode = hlsl::GetHLOpcode(CI);
7998
65.8k
  const IntrinsicLower &lower = gLowerTable[opcode];
7999
65.8k
  DXASSERT((unsigned)lower.IntriOpcode == opcode,
8000
65.8k
           "Intrinsic lowering table index must match intrinsic opcode.");
8001
65.8k
  Value *Result = lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode,
8002
65.8k
                                  helper, pObjHelper, Translated);
8003
65.8k
  if (Result)
8004
40.7k
    CI->replaceAllUsesWith(Result);
8005
65.8k
}
8006
8007
// SharedMem.
8008
namespace {
8009
8010
498
bool IsSharedMemPtr(Value *Ptr) {
8011
498
  return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace;
8012
498
}
8013
8014
498
bool IsLocalVariablePtr(Value *Ptr) {
8015
1.10k
  while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
8016
610
    Ptr = GEP->getPointerOperand();
8017
610
  }
8018
498
  bool isAlloca = isa<AllocaInst>(Ptr);
8019
498
  if (isAlloca)
8020
0
    return true;
8021
8022
498
  GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
8023
498
  if (!GV)
8024
498
    return false;
8025
8026
0
  return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage;
8027
498
}
8028
8029
} // namespace
8030
8031
// Constant buffer.
8032
namespace {
8033
2.31k
unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) {
8034
2.31k
  DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(),
8035
2.31k
           "not an element type");
8036
  // TODO: Use real size after change constant buffer into linear layout.
8037
2.31k
  if (DL.getTypeSizeInBits(EltType) <= 32) {
8038
    // Constant buffer is 4 bytes align.
8039
2.26k
    return 4;
8040
2.26k
  }
8041
8042
48
  return 8;
8043
2.31k
}
8044
8045
Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP,
8046
0
                      IRBuilder<> &Builder) {
8047
0
  Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad);
8048
0
8049
0
  DXASSERT(!EltTy->isIntegerTy(1),
8050
0
           "Bools should not be loaded as their register representation.");
8051
0
8052
0
  // Align to 8 bytes for now.
8053
0
  Constant *align = hlslOP->GetU32Const(8);
8054
0
  Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy);
8055
0
  return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align});
8056
0
}
8057
8058
Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset,
8059
                              bool colMajor, OP *OP, const DataLayout &DL,
8060
0
                              IRBuilder<> &Builder) {
8061
0
  HLMatrixType MatTy = HLMatrixType::cast(matType);
8062
0
  Type *EltTy = MatTy.getElementTypeForMem();
8063
0
  unsigned matSize = MatTy.getNumElements();
8064
0
  std::vector<Value *> elts(matSize);
8065
0
  Value *EltByteSize = ConstantInt::get(
8066
0
      offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
8067
0
8068
0
  // TODO: use real size after change constant buffer into linear layout.
8069
0
  Value *baseOffset = offset;
8070
0
  for (unsigned i = 0; i < matSize; i++) {
8071
0
    elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder);
8072
0
    baseOffset = Builder.CreateAdd(baseOffset, EltByteSize);
8073
0
  }
8074
0
8075
0
  Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
8076
0
  Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
8077
0
  return Vec;
8078
0
}
8079
8080
void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
8081
                    hlsl::OP *hlslOP, IRBuilder<> &Builder,
8082
                    DxilFieldAnnotation *prevFieldAnnotation,
8083
                    const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
8084
                    HLObjectOperationLowerHelper *pObjHelper);
8085
8086
Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP,
8087
104
                             IRBuilder<> &Builder, bool bInsertLdNextToGEP) {
8088
104
  DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
8089
104
  Value *baseIdx = (GEP->idx_begin())->get();
8090
104
  Value *zeroIdx = Builder.getInt32(0);
8091
104
  DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx,
8092
104
                    "base index must be 0");
8093
104
  Value *idx = (GEP->idx_begin() + 1)->get();
8094
104
  if (dyn_cast<ConstantInt>(idx)) {
8095
56
    return Builder.CreateExtractElement(ldData, idx);
8096
56
  }
8097
8098
  // Dynamic indexing.
8099
  // Copy vec to array.
8100
48
  Type *Ty = ldData->getType();
8101
48
  Type *EltTy = Ty->getVectorElementType();
8102
48
  unsigned vecSize = Ty->getVectorNumElements();
8103
48
  ArrayType *AT = ArrayType::get(EltTy, vecSize);
8104
48
  IRBuilder<> AllocaBuilder(
8105
48
      GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
8106
48
  Value *tempArray = AllocaBuilder.CreateAlloca(AT);
8107
48
  Value *zero = Builder.getInt32(0);
8108
240
  for (unsigned int i = 0; i < vecSize; 
i++192
) {
8109
192
    Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i));
8110
192
    Value *Ptr =
8111
192
        Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)});
8112
192
    Builder.CreateStore(Elt, Ptr);
8113
192
  }
8114
  // Load from temp array.
8115
48
  if (bInsertLdNextToGEP) {
8116
    // Insert the new GEP just before the old and to-be-deleted GEP
8117
32
    Builder.SetInsertPoint(GEP);
8118
32
  }
8119
48
  Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
8120
48
  return Builder.CreateLoad(EltGEP);
8121
104
}
8122
8123
void TranslateResourceInCB(LoadInst *LI,
8124
                           HLObjectOperationLowerHelper *pObjHelper,
8125
314
                           GlobalVariable *CbGV) {
8126
314
  if (LI->user_empty()) {
8127
0
    LI->eraseFromParent();
8128
0
    return;
8129
0
  }
8130
8131
314
  GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand());
8132
314
  CallInst *CI = cast<CallInst>(LI->user_back());
8133
314
  CallInst *Anno = cast<CallInst>(CI->user_back());
8134
314
  DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno);
8135
314
  Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP);
8136
8137
  // Lower Ptr to GV base Ptr.
8138
314
  Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr);
8139
314
  IRBuilder<> Builder(LI);
8140
314
  Value *GvLd = Builder.CreateLoad(GvPtr);
8141
314
  LI->replaceAllUsesWith(GvLd);
8142
314
  LI->eraseFromParent();
8143
314
}
8144
8145
void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset,
8146
                            hlsl::OP *hlslOP,
8147
                            DxilFieldAnnotation *prevFieldAnnotation,
8148
                            DxilTypeSystem &dxilTypeSys, const DataLayout &DL,
8149
0
                            HLObjectOperationLowerHelper *pObjHelper) {
8150
0
  IRBuilder<> Builder(user);
8151
0
  if (CallInst *CI = dyn_cast<CallInst>(user)) {
8152
0
    HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
8153
0
    unsigned opcode = GetHLOpcode(CI);
8154
0
    if (group == HLOpcodeGroup::HLMatLoadStore) {
8155
0
      HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
8156
0
      bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
8157
0
      DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
8158
0
                   matOp == HLMatLoadStoreOpcode::RowMatLoad,
8159
0
               "No store on cbuffer");
8160
0
      Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
8161
0
                          ->getType()
8162
0
                          ->getPointerElementType();
8163
0
      Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset,
8164
0
                                            colMajor, hlslOP, DL, Builder);
8165
0
      CI->replaceAllUsesWith(newLd);
8166
0
      CI->eraseFromParent();
8167
0
    } else if (group == HLOpcodeGroup::HLSubscript) {
8168
0
      HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
8169
0
      Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
8170
0
      HLMatrixType MatTy =
8171
0
          HLMatrixType::cast(basePtr->getType()->getPointerElementType());
8172
0
      Type *EltTy = MatTy.getElementTypeForReg();
8173
0
8174
0
      Value *EltByteSize = ConstantInt::get(
8175
0
          baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
8176
0
8177
0
      Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
8178
0
8179
0
      Type *resultType = CI->getType()->getPointerElementType();
8180
0
      unsigned resultSize = 1;
8181
0
      if (resultType->isVectorTy())
8182
0
        resultSize = resultType->getVectorNumElements();
8183
0
      DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
8184
0
      assert(resultSize <= 16);
8185
0
      Value *idxList[16];
8186
0
8187
0
      switch (subOp) {
8188
0
      case HLSubscriptOpcode::ColMatSubscript:
8189
0
      case HLSubscriptOpcode::RowMatSubscript: {
8190
0
        for (unsigned i = 0; i < resultSize; i++) {
8191
0
          Value *idx =
8192
0
              CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
8193
0
          Value *offset = Builder.CreateMul(idx, EltByteSize);
8194
0
          idxList[i] = Builder.CreateAdd(baseOffset, offset);
8195
0
        }
8196
0
8197
0
      } break;
8198
0
      case HLSubscriptOpcode::RowMatElement:
8199
0
      case HLSubscriptOpcode::ColMatElement: {
8200
0
        Constant *EltIdxs = cast<Constant>(idx);
8201
0
        for (unsigned i = 0; i < resultSize; i++) {
8202
0
          Value *offset =
8203
0
              Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
8204
0
          idxList[i] = Builder.CreateAdd(baseOffset, offset);
8205
0
        }
8206
0
      } break;
8207
0
      default:
8208
0
        DXASSERT(0, "invalid operation on const buffer");
8209
0
        break;
8210
0
      }
8211
0
8212
0
      Value *ldData = UndefValue::get(resultType);
8213
0
      if (resultType->isVectorTy()) {
8214
0
        for (unsigned i = 0; i < resultSize; i++) {
8215
0
          Value *eltData =
8216
0
              GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder);
8217
0
          ldData = Builder.CreateInsertElement(ldData, eltData, i);
8218
0
        }
8219
0
      } else {
8220
0
        ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder);
8221
0
      }
8222
0
8223
0
      for (auto U = CI->user_begin(); U != CI->user_end();) {
8224
0
        Value *subsUser = *(U++);
8225
0
        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
8226
0
          Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
8227
0
                                                 /*bInsertLdNextToGEP*/ true);
8228
0
8229
0
          for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
8230
0
            Value *gepUser = *(gepU++);
8231
0
            // Must be load here;
8232
0
            LoadInst *ldUser = cast<LoadInst>(gepUser);
8233
0
            ldUser->replaceAllUsesWith(subData);
8234
0
            ldUser->eraseFromParent();
8235
0
          }
8236
0
          GEP->eraseFromParent();
8237
0
        } else {
8238
0
          // Must be load here.
8239
0
          LoadInst *ldUser = cast<LoadInst>(subsUser);
8240
0
          ldUser->replaceAllUsesWith(ldData);
8241
0
          ldUser->eraseFromParent();
8242
0
        }
8243
0
      }
8244
0
8245
0
      CI->eraseFromParent();
8246
0
    } else {
8247
0
      DXASSERT(0, "not implemented yet");
8248
0
    }
8249
0
  } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
8250
0
    Type *Ty = ldInst->getType();
8251
0
    Type *EltTy = Ty->getScalarType();
8252
0
    // Resource inside cbuffer is lowered after GenerateDxilOperations.
8253
0
    if (dxilutil::IsHLSLObjectType(Ty)) {
8254
0
      CallInst *CI = cast<CallInst>(handle);
8255
0
      // CI should be annotate handle.
8256
0
      // Need createHandle here.
8257
0
      if (GetHLOpcodeGroup(CI->getCalledFunction()) ==
8258
0
          HLOpcodeGroup::HLAnnotateHandle)
8259
0
        CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx));
8260
0
      GlobalVariable *CbGV = cast<GlobalVariable>(
8261
0
          CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
8262
0
      TranslateResourceInCB(ldInst, pObjHelper, CbGV);
8263
0
      return;
8264
0
    }
8265
0
    DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
8266
0
8267
0
    unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
8268
0
8269
0
    Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder);
8270
0
    if (Ty->isVectorTy()) {
8271
0
      Value *result = UndefValue::get(Ty);
8272
0
      result = Builder.CreateInsertElement(result, newLd, (uint64_t)0);
8273
0
      // Update offset by 4 bytes.
8274
0
      Value *offset =
8275
0
          Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize));
8276
0
      for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) {
8277
0
        Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder);
8278
0
        result = Builder.CreateInsertElement(result, elt, i);
8279
0
        // Update offset by 4 bytes.
8280
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize));
8281
0
      }
8282
0
      newLd = result;
8283
0
    }
8284
0
8285
0
    ldInst->replaceAllUsesWith(newLd);
8286
0
    ldInst->eraseFromParent();
8287
0
  } else {
8288
0
    // Must be GEP here
8289
0
    GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
8290
0
    TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder,
8291
0
                   prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
8292
0
    GEP->eraseFromParent();
8293
0
  }
8294
0
}
8295
8296
void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset,
8297
                    hlsl::OP *hlslOP, IRBuilder<> &Builder,
8298
                    DxilFieldAnnotation *prevFieldAnnotation,
8299
                    const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
8300
0
                    HLObjectOperationLowerHelper *pObjHelper) {
8301
0
  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
8302
0
8303
0
  Value *offset = baseOffset;
8304
0
  // update offset
8305
0
  DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
8306
0
8307
0
  gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
8308
0
8309
0
  for (; GEPIt != E; GEPIt++) {
8310
0
    Value *idx = GEPIt.getOperand();
8311
0
    unsigned immIdx = 0;
8312
0
    bool bImmIdx = false;
8313
0
    if (Constant *constIdx = dyn_cast<Constant>(idx)) {
8314
0
      immIdx = constIdx->getUniqueInteger().getLimitedValue();
8315
0
      bImmIdx = true;
8316
0
    }
8317
0
8318
0
    if (GEPIt->isPointerTy()) {
8319
0
      Type *EltTy = GEPIt->getPointerElementType();
8320
0
      unsigned size = 0;
8321
0
      if (StructType *ST = dyn_cast<StructType>(EltTy)) {
8322
0
        DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
8323
0
        size = annotation->GetCBufferSize();
8324
0
      } else {
8325
0
        DXASSERT(fieldAnnotation, "must be a field");
8326
0
        if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
8327
0
          unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
8328
0
              *fieldAnnotation, EltTy, dxilTypeSys);
8329
0
8330
0
          // Decide the nested array size.
8331
0
          unsigned nestedArraySize = 1;
8332
0
8333
0
          Type *EltTy = AT->getArrayElementType();
8334
0
          // support multi level of array
8335
0
          while (EltTy->isArrayTy()) {
8336
0
            ArrayType *EltAT = cast<ArrayType>(EltTy);
8337
0
            nestedArraySize *= EltAT->getNumElements();
8338
0
            EltTy = EltAT->getElementType();
8339
0
          }
8340
0
          // Align to 4 * 4 bytes.
8341
0
          unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
8342
0
          size = nestedArraySize * alignedSize;
8343
0
        } else {
8344
0
          size = DL.getTypeAllocSize(EltTy);
8345
0
        }
8346
0
      }
8347
0
      // Align to 4 * 4 bytes.
8348
0
      size = (size + 15) & 0xfffffff0;
8349
0
      if (bImmIdx) {
8350
0
        unsigned tempOffset = size * immIdx;
8351
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
8352
0
      } else {
8353
0
        Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
8354
0
        offset = Builder.CreateAdd(offset, tempOffset);
8355
0
      }
8356
0
    } else if (GEPIt->isStructTy()) {
8357
0
      StructType *ST = cast<StructType>(*GEPIt);
8358
0
      DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
8359
0
      fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
8360
0
      unsigned structOffset = fieldAnnotation->GetCBufferOffset();
8361
0
      offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset));
8362
0
    } else if (GEPIt->isArrayTy()) {
8363
0
      DXASSERT(fieldAnnotation != nullptr, "must a field");
8364
0
      unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
8365
0
          *fieldAnnotation, *GEPIt, dxilTypeSys);
8366
0
      // Decide the nested array size.
8367
0
      unsigned nestedArraySize = 1;
8368
0
8369
0
      Type *EltTy = GEPIt->getArrayElementType();
8370
0
      // support multi level of array
8371
0
      while (EltTy->isArrayTy()) {
8372
0
        ArrayType *EltAT = cast<ArrayType>(EltTy);
8373
0
        nestedArraySize *= EltAT->getNumElements();
8374
0
        EltTy = EltAT->getElementType();
8375
0
      }
8376
0
      // Align to 4 * 4 bytes.
8377
0
      unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
8378
0
      unsigned size = nestedArraySize * alignedSize;
8379
0
      if (bImmIdx) {
8380
0
        unsigned tempOffset = size * immIdx;
8381
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
8382
0
      } else {
8383
0
        Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
8384
0
        offset = Builder.CreateAdd(offset, tempOffset);
8385
0
      }
8386
0
    } else if (GEPIt->isVectorTy()) {
8387
0
      unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType());
8388
0
      if (bImmIdx) {
8389
0
        unsigned tempOffset = size * immIdx;
8390
0
        offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset));
8391
0
      } else {
8392
0
        Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size));
8393
0
        offset = Builder.CreateAdd(offset, tempOffset);
8394
0
      }
8395
0
    } else {
8396
0
      gep_type_iterator temp = GEPIt;
8397
0
      temp++;
8398
0
      DXASSERT(temp == E, "scalar type must be the last");
8399
0
    }
8400
0
  }
8401
0
8402
0
  for (auto U = GEP->user_begin(); U != GEP->user_end();) {
8403
0
    Instruction *user = cast<Instruction>(*(U++));
8404
0
8405
0
    TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation,
8406
0
                           dxilTypeSys, DL, pObjHelper);
8407
0
  }
8408
0
}
8409
8410
Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
8411
                            unsigned channelOffset, Type *EltTy, OP *hlslOP,
8412
15.1k
                            IRBuilder<> &Builder) {
8413
15.1k
  Constant *OpArg =
8414
15.1k
      hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
8415
8416
15.1k
  DXASSERT(!EltTy->isIntegerTy(1),
8417
15.1k
           "Bools should not be loaded as their register representation.");
8418
8419
15.1k
  Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
8420
15.1k
  Type *halfTy = Type::getHalfTy(EltTy->getContext());
8421
15.1k
  Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
8422
15.1k
  Type *i16Ty = Type::getInt16Ty(EltTy->getContext());
8423
8424
15.1k
  bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
8425
15.1k
  bool is16 = (EltTy == halfTy || 
EltTy == i16Ty14.5k
) &&
!hlslOP->UseMinPrecision()762
;
8426
15.1k
  DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4,
8427
15.1k
                    "legacy cbuffer don't across 16 bytes register.");
8428
15.1k
  if (is64) {
8429
428
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
8430
428
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
8431
428
    DXASSERT((channelOffset & 1) == 0,
8432
428
             "channel offset must be even for double");
8433
428
    unsigned eltIdx = channelOffset >> 1;
8434
428
    Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
8435
428
    return Result;
8436
428
  }
8437
8438
14.6k
  Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
8439
14.6k
  Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
8440
14.6k
  return Builder.CreateExtractValue(loadLegacy, channelOffset);
8441
15.1k
}
8442
8443
Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
8444
                            unsigned channelOffset, Type *EltTy,
8445
                            unsigned vecSize, OP *hlslOP,
8446
14.7k
                            IRBuilder<> &Builder) {
8447
14.7k
  Constant *OpArg =
8448
14.7k
      hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);
8449
8450
14.7k
  DXASSERT(!EltTy->isIntegerTy(1),
8451
14.7k
           "Bools should not be loaded as their register representation.");
8452
8453
14.7k
  Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
8454
14.7k
  Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
8455
14.7k
  Type *halfTy = Type::getHalfTy(EltTy->getContext());
8456
14.7k
  Type *shortTy = Type::getInt16Ty(EltTy->getContext());
8457
8458
14.7k
  bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
8459
14.7k
  bool is16 =
8460
14.7k
      (EltTy == shortTy || 
EltTy == halfTy14.5k
) &&
!hlslOP->UseMinPrecision()898
;
8461
14.7k
  DXASSERT((is16 && channelOffset + vecSize <= 8) ||
8462
14.7k
               (channelOffset + vecSize) <= 4,
8463
14.7k
           "legacy cbuffer don't across 16 bytes register.");
8464
14.7k
  if (is16) {
8465
536
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
8466
536
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
8467
536
    Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
8468
2.06k
    for (unsigned i = 0; i < vecSize; 
++i1.53k
) {
8469
1.53k
      Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
8470
1.53k
      Result = Builder.CreateInsertElement(Result, NewElt, i);
8471
1.53k
    }
8472
536
    return Result;
8473
536
  }
8474
8475
14.2k
  if (is64) {
8476
76
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
8477
76
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
8478
76
    Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
8479
76
    unsigned smallVecSize = 2;
8480
76
    if (vecSize < smallVecSize)
8481
0
      smallVecSize = vecSize;
8482
228
    for (unsigned i = 0; i < smallVecSize; 
++i152
) {
8483
152
      Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
8484
152
      Result = Builder.CreateInsertElement(Result, NewElt, i);
8485
152
    }
8486
76
    if (vecSize > 2) {
8487
      // Got to next cb register.
8488
68
      legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1));
8489
68
      Value *loadLegacy =
8490
68
          Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
8491
204
      for (unsigned i = 2; i < vecSize; 
++i136
) {
8492
136
        Value *NewElt = Builder.CreateExtractValue(loadLegacy, i - 2);
8493
136
        Result = Builder.CreateInsertElement(Result, NewElt, i);
8494
136
      }
8495
68
    }
8496
76
    return Result;
8497
76
  }
8498
8499
14.1k
  Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy);
8500
14.1k
  Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
8501
14.1k
  Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize));
8502
62.5k
  for (unsigned i = 0; i < vecSize; 
++i48.3k
) {
8503
48.3k
    Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i);
8504
48.3k
    Result = Builder.CreateInsertElement(Result, NewElt, i);
8505
48.3k
  }
8506
14.1k
  return Result;
8507
14.2k
}
8508
8509
Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle,
8510
                                    Value *legacyIdx, bool colMajor, OP *OP,
8511
                                    bool memElemRepr, const DataLayout &DL,
8512
2.17k
                                    IRBuilder<> &Builder) {
8513
2.17k
  Type *EltTy = MatTy.getElementTypeForMem();
8514
8515
2.17k
  unsigned matSize = MatTy.getNumElements();
8516
2.17k
  std::vector<Value *> elts(matSize);
8517
2.17k
  unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL);
8518
2.17k
  if (colMajor) {
8519
1.72k
    unsigned colByteSize = 4 * EltByteSize;
8520
1.72k
    unsigned colRegSize = (colByteSize + 15) >> 4;
8521
7.72k
    for (unsigned c = 0; c < MatTy.getNumColumns(); 
c++6.00k
) {
8522
6.00k
      Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0,
8523
6.00k
                                        EltTy, MatTy.getNumRows(), OP, Builder);
8524
8525
27.6k
      for (unsigned r = 0; r < MatTy.getNumRows(); 
r++21.6k
) {
8526
21.6k
        unsigned matIdx = MatTy.getColumnMajorIndex(r, c);
8527
21.6k
        elts[matIdx] = Builder.CreateExtractElement(col, r);
8528
21.6k
      }
8529
      // Update offset for a column.
8530
6.00k
      legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize));
8531
6.00k
    }
8532
1.72k
  } else {
8533
448
    unsigned rowByteSize = 4 * EltByteSize;
8534
448
    unsigned rowRegSize = (rowByteSize + 15) >> 4;
8535
1.73k
    for (unsigned r = 0; r < MatTy.getNumRows(); 
r++1.28k
) {
8536
1.28k
      Value *row =
8537
1.28k
          GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0, EltTy,
8538
1.28k
                               MatTy.getNumColumns(), OP, Builder);
8539
5.32k
      for (unsigned c = 0; c < MatTy.getNumColumns(); 
c++4.03k
) {
8540
4.03k
        unsigned matIdx = MatTy.getRowMajorIndex(r, c);
8541
4.03k
        elts[matIdx] = Builder.CreateExtractElement(row, c);
8542
4.03k
      }
8543
      // Update offset for a row.
8544
1.28k
      legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize));
8545
1.28k
    }
8546
448
  }
8547
8548
2.17k
  Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder);
8549
2.17k
  if (!memElemRepr)
8550
1.86k
    Vec = MatTy.emitLoweredMemToReg(Vec, Builder);
8551
2.17k
  return Vec;
8552
2.17k
}
8553
8554
void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
8555
                          Value *legacyIdx, unsigned channelOffset,
8556
                          hlsl::OP *hlslOP, IRBuilder<> &Builder,
8557
                          DxilFieldAnnotation *prevFieldAnnotation,
8558
                          const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
8559
                          HLObjectOperationLowerHelper *pObjHelper);
8560
8561
void TranslateCBAddressUserLegacy(Instruction *user, Value *handle,
8562
                                  Value *legacyIdx, unsigned channelOffset,
8563
                                  hlsl::OP *hlslOP,
8564
                                  DxilFieldAnnotation *prevFieldAnnotation,
8565
                                  DxilTypeSystem &dxilTypeSys,
8566
                                  const DataLayout &DL,
8567
43.0k
                                  HLObjectOperationLowerHelper *pObjHelper) {
8568
43.0k
  IRBuilder<> Builder(user);
8569
43.0k
  if (CallInst *CI = dyn_cast<CallInst>(user)) {
8570
2.24k
    HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
8571
2.24k
    if (group == HLOpcodeGroup::HLMatLoadStore) {
8572
1.86k
      unsigned opcode = GetHLOpcode(CI);
8573
1.86k
      HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
8574
1.86k
      bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad;
8575
1.86k
      DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad ||
8576
1.86k
                   matOp == HLMatLoadStoreOpcode::RowMatLoad,
8577
1.86k
               "No store on cbuffer");
8578
1.86k
      HLMatrixType MatTy =
8579
1.86k
          HLMatrixType::cast(CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx)
8580
1.86k
                                 ->getType()
8581
1.86k
                                 ->getPointerElementType());
8582
      // This will replace a call, so we should use the register representation
8583
      // of elements
8584
1.86k
      Value *newLd = TranslateConstBufMatLdLegacy(
8585
1.86k
          MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ false, DL,
8586
1.86k
          Builder);
8587
1.86k
      CI->replaceAllUsesWith(newLd);
8588
1.86k
      dxilutil::TryScatterDebugValueToVectorElements(newLd);
8589
1.86k
      CI->eraseFromParent();
8590
1.86k
    } else 
if (372
group == HLOpcodeGroup::HLSubscript372
) {
8591
352
      unsigned opcode = GetHLOpcode(CI);
8592
352
      HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
8593
352
      Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
8594
352
      HLMatrixType MatTy =
8595
352
          HLMatrixType::cast(basePtr->getType()->getPointerElementType());
8596
352
      Type *EltTy = MatTy.getElementTypeForReg();
8597
8598
352
      Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
8599
8600
352
      Type *resultType = CI->getType()->getPointerElementType();
8601
352
      unsigned resultSize = 1;
8602
352
      if (resultType->isVectorTy())
8603
256
        resultSize = resultType->getVectorNumElements();
8604
352
      DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
8605
352
      assert(resultSize <= 16);
8606
352
      Value *idxList[16];
8607
352
      bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript ||
8608
352
                      
subOp == HLSubscriptOpcode::ColMatElement178
;
8609
352
      bool dynamicIndexing = !isa<ConstantInt>(idx) &&
8610
352
                             
!isa<ConstantAggregateZero>(idx)162
&&
8611
352
                             
!isa<ConstantDataSequential>(idx)138
;
8612
8613
352
      Value *ldData = UndefValue::get(resultType);
8614
352
      if (!dynamicIndexing) {
8615
        // This will replace a load or GEP, so we should use the memory
8616
        // representation of elements
8617
302
        Value *matLd = TranslateConstBufMatLdLegacy(
8618
302
            MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ true,
8619
302
            DL, Builder);
8620
        // The matLd is keep original layout, just use the idx calc in
8621
        // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript.
8622
302
        switch (subOp) {
8623
50
        case HLSubscriptOpcode::RowMatSubscript:
8624
190
        case HLSubscriptOpcode::ColMatSubscript: {
8625
830
          for (unsigned i = 0; i < resultSize; 
i++640
) {
8626
640
            idxList[i] =
8627
640
                CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
8628
640
          }
8629
190
        } break;
8630
32
        case HLSubscriptOpcode::RowMatElement:
8631
112
        case HLSubscriptOpcode::ColMatElement: {
8632
112
          Constant *EltIdxs = cast<Constant>(idx);
8633
264
          for (unsigned i = 0; i < resultSize; 
i++152
) {
8634
152
            idxList[i] = EltIdxs->getAggregateElement(i);
8635
152
          }
8636
112
        } break;
8637
0
        default:
8638
0
          DXASSERT(0, "invalid operation on const buffer");
8639
0
          break;
8640
302
        }
8641
8642
302
        if (resultType->isVectorTy()) {
8643
902
          for (unsigned i = 0; i < resultSize; 
i++696
) {
8644
696
            Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]);
8645
696
            ldData = Builder.CreateInsertElement(ldData, eltData, i);
8646
696
          }
8647
206
        } else {
8648
96
          Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]);
8649
96
          ldData = eltData;
8650
96
        }
8651
302
      } else {
8652
        // Must be matSub here.
8653
50
        Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
8654
8655
50
        if (colMajor) {
8656
          // idx is c * row + r.
8657
          // For first col, c is 0, so idx is r.
8658
34
          Value *one = Builder.getInt32(1);
8659
          // row.x = c[0].[idx]
8660
          // row.y = c[1].[idx]
8661
          // row.z = c[2].[idx]
8662
          // row.w = c[3].[idx]
8663
34
          Value *Elts[4];
8664
34
          ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumRows());
8665
8666
34
          IRBuilder<> AllocaBuilder(user->getParent()
8667
34
                                        ->getParent()
8668
34
                                        ->getEntryBlock()
8669
34
                                        .getFirstInsertionPt());
8670
8671
34
          Value *tempArray = AllocaBuilder.CreateAlloca(AT);
8672
34
          Value *zero = AllocaBuilder.getInt32(0);
8673
34
          Value *cbufIdx = legacyIdx;
8674
158
          for (unsigned int c = 0; c < MatTy.getNumColumns(); 
c++124
) {
8675
124
            Value *ColVal = GenerateCBLoadLegacy(
8676
124
                handle, cbufIdx, /*channelOffset*/ 0, EltTy, MatTy.getNumRows(),
8677
124
                hlslOP, Builder);
8678
            // Convert ColVal to array for indexing.
8679
592
            for (unsigned int r = 0; r < MatTy.getNumRows(); 
r++468
) {
8680
468
              Value *Elt =
8681
468
                  Builder.CreateExtractElement(ColVal, Builder.getInt32(r));
8682
468
              Value *Ptr = Builder.CreateInBoundsGEP(
8683
468
                  tempArray, {zero, Builder.getInt32(r)});
8684
468
              Builder.CreateStore(Elt, Ptr);
8685
468
            }
8686
8687
124
            Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx});
8688
124
            Elts[c] = Builder.CreateLoad(Ptr);
8689
            // Update cbufIdx.
8690
124
            cbufIdx = Builder.CreateAdd(cbufIdx, one);
8691
124
          }
8692
34
          if (resultType->isVectorTy()) {
8693
158
            for (unsigned int c = 0; c < MatTy.getNumColumns(); 
c++124
) {
8694
124
              ldData = Builder.CreateInsertElement(ldData, Elts[c], c);
8695
124
            }
8696
34
          } else {
8697
0
            ldData = Elts[0];
8698
0
          }
8699
34
        } else {
8700
          // idx is r * col + c;
8701
          // r = idx / col;
8702
16
          Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns());
8703
16
          idx = Builder.CreateUDiv(idx, cCol);
8704
16
          idx = Builder.CreateAdd(idx, legacyIdx);
8705
          // Just return a row; 'col' is the number of columns in the row.
8706
16
          ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy,
8707
16
                                        MatTy.getNumColumns(), hlslOP, Builder);
8708
16
        }
8709
50
        if (!resultType->isVectorTy()) {
8710
0
          ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0));
8711
0
        }
8712
50
      }
8713
8714
704
      
for (auto U = CI->user_begin(); 352
U != CI->user_end();) {
8715
352
        Value *subsUser = *(U++);
8716
352
        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
8717
80
          Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder,
8718
80
                                                 /*bInsertLdNextToGEP*/ true);
8719
160
          for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
8720
80
            Value *gepUser = *(gepU++);
8721
            // Must be load here;
8722
80
            LoadInst *ldUser = cast<LoadInst>(gepUser);
8723
80
            ldUser->replaceAllUsesWith(subData);
8724
80
            ldUser->eraseFromParent();
8725
80
          }
8726
80
          GEP->eraseFromParent();
8727
272
        } else {
8728
          // Must be load here.
8729
272
          LoadInst *ldUser = cast<LoadInst>(subsUser);
8730
272
          ldUser->replaceAllUsesWith(ldData);
8731
272
          ldUser->eraseFromParent();
8732
272
        }
8733
352
      }
8734
8735
352
      CI->eraseFromParent();
8736
352
    } else 
if (IntrinsicInst *20
II20
= dyn_cast<IntrinsicInst>(user)) {
8737
20
      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
8738
20
          
II->getIntrinsicID() == Intrinsic::lifetime_end10
) {
8739
20
        DXASSERT(II->use_empty(), "lifetime intrinsic can't have uses");
8740
20
        II->eraseFromParent();
8741
20
      } else {
8742
0
        DXASSERT(0, "not implemented yet");
8743
0
      }
8744
20
    } else {
8745
0
      DXASSERT(0, "not implemented yet");
8746
0
    }
8747
40.8k
  } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
8748
22.7k
    Type *Ty = ldInst->getType();
8749
22.7k
    Type *EltTy = Ty->getScalarType();
8750
    // Resource inside cbuffer is lowered after GenerateDxilOperations.
8751
22.7k
    if (dxilutil::IsHLSLObjectType(Ty)) {
8752
314
      CallInst *CI = cast<CallInst>(handle);
8753
      // CI should be annotate handle.
8754
      // Need createHandle here.
8755
314
      if (GetHLOpcodeGroup(CI->getCalledFunction()) ==
8756
314
          HLOpcodeGroup::HLAnnotateHandle)
8757
314
        CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx));
8758
8759
314
      GlobalVariable *CbGV = cast<GlobalVariable>(
8760
314
          CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx));
8761
314
      TranslateResourceInCB(ldInst, pObjHelper, CbGV);
8762
314
      return;
8763
314
    }
8764
22.4k
    DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass");
8765
8766
22.4k
    Value *newLd = nullptr;
8767
8768
22.4k
    if (Ty->isVectorTy())
8769
7.31k
      newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
8770
7.31k
                                   Ty->getVectorNumElements(), hlslOP, Builder);
8771
15.1k
    else
8772
15.1k
      newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy,
8773
15.1k
                                   hlslOP, Builder);
8774
8775
22.4k
    ldInst->replaceAllUsesWith(newLd);
8776
22.4k
    dxilutil::TryScatterDebugValueToVectorElements(newLd);
8777
22.4k
    ldInst->eraseFromParent();
8778
22.4k
  } else 
if (BitCastInst *18.1k
BCI18.1k
= dyn_cast<BitCastInst>(user)) {
8779
64
    for (auto it = BCI->user_begin(); it != BCI->user_end();) {
8780
36
      Instruction *I = cast<Instruction>(*it++);
8781
36
      TranslateCBAddressUserLegacy(I, handle, legacyIdx, channelOffset, hlslOP,
8782
36
                                   prevFieldAnnotation, dxilTypeSys, DL,
8783
36
                                   pObjHelper);
8784
36
    }
8785
28
    BCI->eraseFromParent();
8786
18.0k
  } else {
8787
    // Must be GEP here
8788
18.0k
    GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
8789
18.0k
    TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder,
8790
18.0k
                         prevFieldAnnotation, DL, dxilTypeSys, pObjHelper);
8791
18.0k
    GEP->eraseFromParent();
8792
18.0k
  }
8793
43.0k
}
8794
8795
void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle,
8796
                          Value *legacyIndex, unsigned channel,
8797
                          hlsl::OP *hlslOP, IRBuilder<> &Builder,
8798
                          DxilFieldAnnotation *prevFieldAnnotation,
8799
                          const DataLayout &DL, DxilTypeSystem &dxilTypeSys,
8800
18.0k
                          HLObjectOperationLowerHelper *pObjHelper) {
8801
18.0k
  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
8802
8803
  // update offset
8804
18.0k
  DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation;
8805
8806
18.0k
  gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP);
8807
8808
62.2k
  for (; GEPIt != E; 
GEPIt++44.1k
) {
8809
44.2k
    Value *idx = GEPIt.getOperand();
8810
44.2k
    unsigned immIdx = 0;
8811
44.2k
    bool bImmIdx = false;
8812
44.2k
    if (Constant *constIdx = dyn_cast<Constant>(idx)) {
8813
41.4k
      immIdx = constIdx->getUniqueInteger().getLimitedValue();
8814
41.4k
      bImmIdx = true;
8815
41.4k
    }
8816
8817
44.2k
    if (GEPIt->isPointerTy()) {
8818
18.0k
      Type *EltTy = GEPIt->getPointerElementType();
8819
18.0k
      unsigned size = 0;
8820
18.0k
      if (StructType *ST = dyn_cast<StructType>(EltTy)) {
8821
18.0k
        DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
8822
18.0k
        size = annotation->GetCBufferSize();
8823
18.0k
      } else {
8824
32
        DXASSERT(fieldAnnotation, "must be a field");
8825
32
        if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) {
8826
32
          unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
8827
32
              *fieldAnnotation, EltTy, dxilTypeSys);
8828
8829
          // Decide the nested array size.
8830
32
          unsigned nestedArraySize = 1;
8831
8832
32
          Type *EltTy = AT->getArrayElementType();
8833
          // support multi level of array
8834
40
          while (EltTy->isArrayTy()) {
8835
8
            ArrayType *EltAT = cast<ArrayType>(EltTy);
8836
8
            nestedArraySize *= EltAT->getNumElements();
8837
8
            EltTy = EltAT->getElementType();
8838
8
          }
8839
          // Align to 4 * 4 bytes.
8840
32
          unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
8841
32
          size = nestedArraySize * alignedSize;
8842
32
        } else {
8843
0
          size = DL.getTypeAllocSize(EltTy);
8844
0
        }
8845
32
      }
8846
      // Skip 0 idx.
8847
18.0k
      if (bImmIdx && immIdx == 0)
8848
18.0k
        continue;
8849
      // Align to 4 * 4 bytes.
8850
0
      size = (size + 15) & 0xfffffff0;
8851
8852
      // Take this as array idxing.
8853
0
      if (bImmIdx) {
8854
0
        unsigned tempOffset = size * immIdx;
8855
0
        unsigned idxInc = tempOffset >> 4;
8856
0
        legacyIndex =
8857
0
            Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
8858
0
      } else {
8859
0
        Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4));
8860
0
        legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
8861
0
      }
8862
8863
      // Array always start from x channel.
8864
0
      channel = 0;
8865
26.1k
    } else if (GEPIt->isStructTy()) {
8866
21.4k
      StructType *ST = cast<StructType>(*GEPIt);
8867
21.4k
      DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST);
8868
21.4k
      fieldAnnotation = &annotation->GetFieldAnnotation(immIdx);
8869
8870
21.4k
      unsigned idxInc = 0;
8871
21.4k
      unsigned structOffset = 0;
8872
21.4k
      if (fieldAnnotation->GetCompType().Is16Bit() &&
8873
21.4k
          
!hlslOP->UseMinPrecision()1.10k
) {
8874
764
        structOffset = fieldAnnotation->GetCBufferOffset() >> 1;
8875
764
        channel += structOffset;
8876
764
        idxInc = channel >> 3;
8877
764
        channel = channel & 0x7;
8878
20.7k
      } else {
8879
20.7k
        structOffset = fieldAnnotation->GetCBufferOffset() >> 2;
8880
20.7k
        channel += structOffset;
8881
20.7k
        idxInc = channel >> 2;
8882
20.7k
        channel = channel & 0x3;
8883
20.7k
      }
8884
21.4k
      if (idxInc)
8885
8.27k
        legacyIndex =
8886
8.27k
            Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
8887
21.4k
    } else 
if (4.64k
GEPIt->isArrayTy()4.64k
) {
8888
4.17k
      DXASSERT(fieldAnnotation != nullptr, "must a field");
8889
4.17k
      unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize(
8890
4.17k
          *fieldAnnotation, *GEPIt, dxilTypeSys);
8891
      // Decide the nested array size.
8892
4.17k
      unsigned nestedArraySize = 1;
8893
8894
4.17k
      Type *EltTy = GEPIt->getArrayElementType();
8895
      // support multi level of array
8896
4.78k
      while (EltTy->isArrayTy()) {
8897
606
        ArrayType *EltAT = cast<ArrayType>(EltTy);
8898
606
        nestedArraySize *= EltAT->getNumElements();
8899
606
        EltTy = EltAT->getElementType();
8900
606
      }
8901
      // Align to 4 * 4 bytes.
8902
4.17k
      unsigned alignedSize = (EltSize + 15) & 0xfffffff0;
8903
4.17k
      unsigned size = nestedArraySize * alignedSize;
8904
4.17k
      if (bImmIdx) {
8905
1.41k
        unsigned tempOffset = size * immIdx;
8906
1.41k
        unsigned idxInc = tempOffset >> 4;
8907
1.41k
        legacyIndex =
8908
1.41k
            Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc));
8909
2.76k
      } else {
8910
2.76k
        Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4));
8911
2.76k
        legacyIndex = Builder.CreateAdd(legacyIndex, idxInc);
8912
2.76k
      }
8913
8914
      // Array always start from x channel.
8915
4.17k
      channel = 0;
8916
4.17k
    } else 
if (470
GEPIt->isVectorTy()470
) {
8917
      // Indexing on vector.
8918
470
      if (bImmIdx) {
8919
422
        if (immIdx < GEPIt->getVectorNumElements()) {
8920
394
          const unsigned vectorElmSize =
8921
394
              DL.getTypeAllocSize(GEPIt->getVectorElementType());
8922
394
          const bool bIs16bitType = vectorElmSize == 2;
8923
394
          const unsigned tempOffset = vectorElmSize * immIdx;
8924
394
          const unsigned numChannelsPerRow = bIs16bitType ? 
832
:
4362
;
8925
394
          const unsigned channelInc =
8926
394
              bIs16bitType ? 
tempOffset >> 132
:
tempOffset >> 2362
;
8927
8928
394
          DXASSERT((channel + channelInc) < numChannelsPerRow,
8929
394
                   "vector should not cross cb register");
8930
394
          channel += channelInc;
8931
394
          if (channel == numChannelsPerRow) {
8932
            // Get to another row.
8933
            // Update index and channel.
8934
0
            channel = 0;
8935
0
            legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1));
8936
0
          }
8937
394
        } else {
8938
28
          StringRef resName = "(unknown)";
8939
28
          if (DxilResourceBase *Res =
8940
28
                  pObjHelper->FindCBufferResourceFromHandle(handle)) {
8941
28
            resName = Res->GetGlobalName();
8942
28
          }
8943
28
          legacyIndex = hlsl::CreatePoisonValue(
8944
28
              legacyIndex->getType(),
8945
28
              Twine("Out of bounds index (") + Twine(immIdx) +
8946
28
                  Twine(") in CBuffer '") + Twine(resName) + ("'"),
8947
28
              GEP->getDebugLoc(), GEP);
8948
28
          channel = 0;
8949
28
        }
8950
422
      } else {
8951
48
        Type *EltTy = GEPIt->getVectorElementType();
8952
48
        unsigned vecSize = GEPIt->getVectorNumElements();
8953
8954
        // Load the whole register.
8955
48
        Value *newLd =
8956
48
            GenerateCBLoadLegacy(handle, legacyIndex,
8957
48
                                 /*channelOffset*/ channel, EltTy,
8958
48
                                 /*vecSize*/ vecSize, hlslOP, Builder);
8959
        // Copy to array.
8960
48
        IRBuilder<> AllocaBuilder(GEP->getParent()
8961
48
                                      ->getParent()
8962
48
                                      ->getEntryBlock()
8963
48
                                      .getFirstInsertionPt());
8964
48
        Value *tempArray =
8965
48
            AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, vecSize));
8966
48
        Value *zeroIdx = hlslOP->GetU32Const(0);
8967
216
        for (unsigned i = 0; i < vecSize; 
i++168
) {
8968
168
          Value *Elt = Builder.CreateExtractElement(newLd, i);
8969
168
          Value *EltGEP = Builder.CreateInBoundsGEP(
8970
168
              tempArray, {zeroIdx, hlslOP->GetU32Const(i)});
8971
168
          Builder.CreateStore(Elt, EltGEP);
8972
168
        }
8973
        // Make sure this is the end of GEP.
8974
48
        gep_type_iterator temp = GEPIt;
8975
48
        temp++;
8976
48
        DXASSERT(temp == E, "scalar type must be the last");
8977
8978
        // Replace the GEP with array GEP.
8979
48
        Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx});
8980
48
        GEP->replaceAllUsesWith(ArrayGEP);
8981
48
        return;
8982
48
      }
8983
470
    } else {
8984
0
      gep_type_iterator temp = GEPIt;
8985
0
      temp++;
8986
0
      DXASSERT(temp == E, "scalar type must be the last");
8987
0
    }
8988
44.2k
  }
8989
8990
43.0k
  
for (auto U = GEP->user_begin(); 18.0k
U != GEP->user_end();) {
8991
24.9k
    Instruction *user = cast<Instruction>(*(U++));
8992
8993
24.9k
    TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP,
8994
24.9k
                                 fieldAnnotation, dxilTypeSys, DL, pObjHelper);
8995
24.9k
  }
8996
18.0k
}
8997
8998
void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP,
8999
                                 DxilTypeSystem &dxilTypeSys,
9000
                                 const DataLayout &DL,
9001
8.73k
                                 HLObjectOperationLowerHelper *pObjHelper) {
9002
8.73k
  auto User = ptr->user_begin();
9003
8.73k
  auto UserE = ptr->user_end();
9004
8.73k
  Value *zeroIdx = hlslOP->GetU32Const(0);
9005
26.8k
  for (; User != UserE;) {
9006
    // Must be Instruction.
9007
18.0k
    Instruction *I = cast<Instruction>(*(User++));
9008
18.0k
    TranslateCBAddressUserLegacy(
9009
18.0k
        I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP,
9010
18.0k
        /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper);
9011
18.0k
  }
9012
8.73k
}
9013
9014
} // namespace
9015
9016
// Structured buffer.
9017
namespace {
9018
9019
Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset,
9020
                        Value *status, Type *EltTy,
9021
                        MutableArrayRef<Value *> resultElts, hlsl::OP *OP,
9022
                        IRBuilder<> &Builder, unsigned NumComponents,
9023
28
                        Constant *alignment) {
9024
28
  OP::OpCode opcode = OP::OpCode::RawBufferLoad;
9025
9026
28
  DXASSERT(resultElts.size() <= 4,
9027
28
           "buffer load cannot load more than 4 values");
9028
9029
28
  if (bufIdx == nullptr) {
9030
    // This is actually a byte address buffer load with a struct template type.
9031
    // The call takes only one coordinates for the offset.
9032
0
    bufIdx = offset;
9033
0
    offset = UndefValue::get(offset->getType());
9034
0
  }
9035
9036
28
  Function *dxilF = OP->GetOpFunc(opcode, EltTy);
9037
28
  Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP);
9038
28
  Value *Args[] = {OP->GetU32Const((unsigned)opcode),
9039
28
                   handle,
9040
28
                   bufIdx,
9041
28
                   offset,
9042
28
                   mask,
9043
28
                   alignment};
9044
28
  Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode));
9045
9046
56
  for (unsigned i = 0; i < resultElts.size(); 
i++28
) {
9047
28
    resultElts[i] = Builder.CreateExtractValue(Ld, i);
9048
28
  }
9049
9050
  // status
9051
28
  UpdateStatus(Ld, status, Builder, OP);
9052
28
  return Ld;
9053
28
}
9054
9055
void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset,
9056
                         Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder,
9057
                         ArrayRef<Value *> vals, uint8_t mask,
9058
60
                         Constant *alignment) {
9059
60
  OP::OpCode opcode = OP::OpCode::RawBufferStore;
9060
60
  DXASSERT(vals.size() == 4, "buffer store need 4 values");
9061
9062
60
  Value *Args[] = {OP->GetU32Const((unsigned)opcode),
9063
60
                   handle,
9064
60
                   bufIdx,
9065
60
                   offset,
9066
60
                   vals[0],
9067
60
                   vals[1],
9068
60
                   vals[2],
9069
60
                   vals[3],
9070
60
                   OP->GetU8Const(mask),
9071
60
                   alignment};
9072
60
  Function *dxilF = OP->GetOpFunc(opcode, EltTy);
9073
60
  Builder.CreateCall(dxilF, Args);
9074
60
}
9075
9076
Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder,
9077
                               Value *handle, HLResource::Kind RK, hlsl::OP *OP,
9078
                               Value *status, Value *bufIdx, Value *baseOffset,
9079
814
                               const DataLayout &DL) {
9080
9081
814
  ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status);
9082
#ifndef NDEBUG
9083
  Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
9084
  Type *matType = ptr->getType()->getPointerElementType();
9085
  HLMatrixType MatTy = HLMatrixType::cast(matType);
9086
  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) ==
9087
               helper.retVal->getType(),
9088
           "helper type should match vectorized matrix");
9089
#endif
9090
814
  return TranslateBufLoad(helper, RK, Builder, OP, DL);
9091
814
}
9092
9093
void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle,
9094
                             hlsl::OP *OP, Value *bufIdx, Value *baseOffset,
9095
1.18k
                             Value *val, const DataLayout &DL) {
9096
1.18k
  [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType);
9097
1.18k
  DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(),
9098
1.18k
           "helper type should match vectorized matrix");
9099
1.18k
  TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
9100
1.18k
                 baseOffset, Builder, OP);
9101
1.18k
}
9102
9103
void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK,
9104
                               hlsl::OP *OP, Value *status, Value *bufIdx,
9105
2.00k
                               Value *baseOffset, const DataLayout &DL) {
9106
2.00k
  IRBuilder<> Builder(CI);
9107
2.00k
  HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction());
9108
2.00k
  unsigned opcode = GetHLOpcode(CI);
9109
2.00k
  DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore,
9110
2.00k
                    "only translate matrix loadStore here.");
9111
2.00k
  HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode);
9112
  // Due to the current way the initial codegen generates matrix
9113
  // orientation casts, the in-register vector matrix has already been
9114
  // reordered based on the destination's row or column-major packing
9115
  // orientation.
9116
2.00k
  switch (matOp) {
9117
242
  case HLMatLoadStoreOpcode::RowMatLoad:
9118
814
  case HLMatLoadStoreOpcode::ColMatLoad:
9119
814
    TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx,
9120
814
                            baseOffset, DL);
9121
814
    break;
9122
194
  case HLMatLoadStoreOpcode::RowMatStore:
9123
1.18k
  case HLMatLoadStoreOpcode::ColMatStore: {
9124
1.18k
    Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
9125
1.18k
    Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
9126
1.18k
    TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder,
9127
1.18k
                            handle, OP, bufIdx, baseOffset, val, DL);
9128
1.18k
  } break;
9129
2.00k
  }
9130
9131
2.00k
  CI->eraseFromParent();
9132
2.00k
}
9133
9134
void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
9135
                                     HLResource::Kind ResKind, Value *bufIdx,
9136
                                     Value *baseOffset, Value *status,
9137
                                     hlsl::OP *OP, const DataLayout &DL);
9138
9139
// For case like mat[i][j].
9140
// IdxList is [i][0], [i][1], [i][2],[i][3].
9141
// Idx is j.
9142
// return [i][j] not mat[i][j] because resource ptr and temp ptr need different
9143
// code gen.
9144
static Value *LowerGEPOnMatIndexListToIndex(llvm::GetElementPtrInst *GEP,
9145
24
                                            ArrayRef<Value *> IdxList) {
9146
24
  IRBuilder<> Builder(GEP);
9147
24
  Value *zero = Builder.getInt32(0);
9148
24
  DXASSERT(GEP->getNumIndices() == 2, "must have 2 level");
9149
24
  Value *baseIdx = (GEP->idx_begin())->get();
9150
24
  DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0");
9151
24
  Value *Idx = (GEP->idx_begin() + 1)->get();
9152
9153
24
  if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) {
9154
16
    return IdxList[immIdx->getSExtValue()];
9155
16
  }
9156
9157
8
  IRBuilder<> AllocaBuilder(
9158
8
      GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
9159
8
  unsigned size = IdxList.size();
9160
  // Store idxList to temp array.
9161
8
  ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size);
9162
8
  Value *tempArray = AllocaBuilder.CreateAlloca(AT);
9163
9164
40
  for (unsigned i = 0; i < size; 
i++32
) {
9165
32
    Value *EltPtr = Builder.CreateGEP(tempArray, {zero, Builder.getInt32(i)});
9166
32
    Builder.CreateStore(IdxList[i], EltPtr);
9167
32
  }
9168
  // Load the idx.
9169
8
  Value *GEPOffset = Builder.CreateGEP(tempArray, {zero, Idx});
9170
8
  return Builder.CreateLoad(GEPOffset);
9171
24
}
9172
9173
// subscript operator for matrix of struct element.
9174
void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
9175
                                    HLResource::Kind ResKind, Value *bufIdx,
9176
                                    Value *baseOffset, Value *status,
9177
146
                                    hlsl::OP *hlslOP, const DataLayout &DL) {
9178
146
  unsigned opcode = GetHLOpcode(CI);
9179
146
  IRBuilder<> subBuilder(CI);
9180
146
  HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode);
9181
146
  Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
9182
146
  HLMatrixType MatTy =
9183
146
      HLMatrixType::cast(basePtr->getType()->getPointerElementType());
9184
146
  Type *EltTy = MatTy.getElementTypeForReg();
9185
146
  Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy));
9186
9187
146
  Value *EltByteSize = ConstantInt::get(
9188
146
      baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL));
9189
9190
146
  Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx);
9191
9192
146
  Type *resultType = CI->getType()->getPointerElementType();
9193
146
  unsigned resultSize = 1;
9194
146
  if (resultType->isVectorTy())
9195
90
    resultSize = resultType->getVectorNumElements();
9196
146
  DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix");
9197
146
  assert(resultSize <= 16);
9198
146
  std::vector<Value *> idxList(resultSize);
9199
9200
146
  switch (subOp) {
9201
90
  case HLSubscriptOpcode::ColMatSubscript:
9202
90
  case HLSubscriptOpcode::RowMatSubscript: {
9203
274
    for (unsigned i = 0; i < resultSize; 
i++184
) {
9204
184
      Value *offset =
9205
184
          CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
9206
184
      offset = subBuilder.CreateMul(offset, EltByteSize);
9207
184
      idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
9208
184
    }
9209
90
  } break;
9210
0
  case HLSubscriptOpcode::RowMatElement:
9211
56
  case HLSubscriptOpcode::ColMatElement: {
9212
56
    Constant *EltIdxs = cast<Constant>(idx);
9213
112
    for (unsigned i = 0; i < resultSize; 
i++56
) {
9214
56
      Value *offset =
9215
56
          subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
9216
56
      idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
9217
56
    }
9218
56
  } break;
9219
0
  default:
9220
0
    DXASSERT(0, "invalid operation on const buffer");
9221
0
    break;
9222
146
  }
9223
9224
146
  Value *undefElt = UndefValue::get(EltTy);
9225
9226
292
  for (auto U = CI->user_begin(); U != CI->user_end();) {
9227
146
    Value *subsUser = *(U++);
9228
146
    if (resultSize == 1) {
9229
88
      TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle,
9230
88
                                      ResKind, bufIdx, idxList[0], status,
9231
88
                                      hlslOP, DL);
9232
88
      continue;
9233
88
    }
9234
58
    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
9235
24
      Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList);
9236
9237
48
      for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
9238
24
        Instruction *gepUserInst = cast<Instruction>(*(gepU++));
9239
24
        TranslateStructBufSubscriptUser(gepUserInst, handle, ResKind, bufIdx,
9240
24
                                        GEPOffset, status, hlslOP, DL);
9241
24
      }
9242
9243
24
      GEP->eraseFromParent();
9244
34
    } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) {
9245
      // Store elements of matrix in a struct. Needs to be done one scalar at a
9246
      // time even for vectors in the case that matrix orientation spreads the
9247
      // indexed scalars throughout the matrix vector.
9248
22
      IRBuilder<> stBuilder(stUser);
9249
22
      Value *Val = stUser->getValueOperand();
9250
22
      if (Val->getType()->isVectorTy()) {
9251
82
        for (unsigned i = 0; i < resultSize; 
i++60
) {
9252
60
          Value *EltVal = stBuilder.CreateExtractElement(Val, i);
9253
60
          uint8_t mask = DXIL::kCompMask_X;
9254
60
          GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
9255
60
                              stBuilder, {EltVal, undefElt, undefElt, undefElt},
9256
60
                              mask, alignment);
9257
60
        }
9258
22
      } else {
9259
0
        uint8_t mask = DXIL::kCompMask_X;
9260
0
        GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
9261
0
                            stBuilder, {Val, undefElt, undefElt, undefElt},
9262
0
                            mask, alignment);
9263
0
      }
9264
9265
22
      stUser->eraseFromParent();
9266
22
    } else {
9267
      // Must be load here.
9268
12
      LoadInst *ldUser = cast<LoadInst>(subsUser);
9269
12
      IRBuilder<> ldBuilder(ldUser);
9270
12
      Value *ldData = UndefValue::get(resultType);
9271
      // Load elements of matrix in a struct. Needs to be done one scalar at a
9272
      // time even for vectors in the case that matrix orientation spreads the
9273
      // indexed scalars throughout the matrix vector.
9274
12
      if (resultType->isVectorTy()) {
9275
40
        for (unsigned i = 0; i < resultSize; 
i++28
) {
9276
28
          Value *ResultElt;
9277
          // TODO: This can be inefficient for row major matrix load
9278
28
          GenerateRawBufLd(handle, bufIdx, idxList[i],
9279
28
                           /*status*/ nullptr, EltTy, ResultElt, hlslOP,
9280
28
                           ldBuilder, 1, alignment);
9281
28
          ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
9282
28
        }
9283
12
      } else {
9284
0
        GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr, EltTy,
9285
0
                         ldData, hlslOP, ldBuilder, 4, alignment);
9286
0
      }
9287
12
      ldUser->replaceAllUsesWith(ldData);
9288
12
      ldUser->eraseFromParent();
9289
12
    }
9290
58
  }
9291
9292
146
  CI->eraseFromParent();
9293
146
}
9294
9295
void TranslateStructBufSubscriptUser(Instruction *user, Value *handle,
9296
                                     HLResource::Kind ResKind, Value *bufIdx,
9297
                                     Value *baseOffset, Value *status,
9298
37.3k
                                     hlsl::OP *OP, const DataLayout &DL) {
9299
37.3k
  IRBuilder<> Builder(user);
9300
37.3k
  if (CallInst *userCall = dyn_cast<CallInst>(user)) {
9301
3.68k
    HLOpcodeGroup group = // user call?
9302
3.68k
        hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
9303
3.68k
    unsigned opcode = GetHLOpcode(userCall);
9304
    // For case element type of structure buffer is not structure type.
9305
3.68k
    if (baseOffset == nullptr)
9306
0
      baseOffset = OP->GetU32Const(0);
9307
3.68k
    if (group == HLOpcodeGroup::HLIntrinsic) {
9308
1.53k
      IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
9309
1.53k
      switch (IOP) {
9310
0
      case IntrinsicOp::MOP_Load: {
9311
0
        if (userCall->getType()->isPointerTy()) {
9312
          // Struct will return pointers which like []
9313
9314
0
        } else {
9315
          // Use builtin types on structuredBuffer.
9316
0
        }
9317
0
        DXASSERT(0, "not implement yet");
9318
0
      } break;
9319
364
      case IntrinsicOp::IOP_InterlockedAdd: {
9320
364
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9321
364
                            baseOffset);
9322
364
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add,
9323
364
                                       Builder, OP);
9324
364
      } break;
9325
72
      case IntrinsicOp::IOP_InterlockedAnd: {
9326
72
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9327
72
                            baseOffset);
9328
72
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And,
9329
72
                                       Builder, OP);
9330
72
      } break;
9331
224
      case IntrinsicOp::IOP_InterlockedExchange: {
9332
224
        Type *opType = nullptr;
9333
224
        PointerType *ptrType = dyn_cast<PointerType>(
9334
224
            userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)
9335
224
                ->getType());
9336
224
        if (ptrType && ptrType->getElementType()->isFloatTy())
9337
12
          opType = Type::getInt32Ty(userCall->getContext());
9338
224
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9339
224
                            baseOffset, opType);
9340
224
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange,
9341
224
                                       Builder, OP);
9342
224
      } break;
9343
40
      case IntrinsicOp::IOP_InterlockedMax: {
9344
40
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9345
40
                            baseOffset);
9346
40
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax,
9347
40
                                       Builder, OP);
9348
40
      } break;
9349
40
      case IntrinsicOp::IOP_InterlockedMin: {
9350
40
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9351
40
                            baseOffset);
9352
40
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin,
9353
40
                                       Builder, OP);
9354
40
      } break;
9355
52
      case IntrinsicOp::IOP_InterlockedUMax: {
9356
52
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9357
52
                            baseOffset);
9358
52
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax,
9359
52
                                       Builder, OP);
9360
52
      } break;
9361
40
      case IntrinsicOp::IOP_InterlockedUMin: {
9362
40
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9363
40
                            baseOffset);
9364
40
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin,
9365
40
                                       Builder, OP);
9366
40
      } break;
9367
96
      case IntrinsicOp::IOP_InterlockedOr: {
9368
96
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9369
96
                            baseOffset);
9370
96
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or,
9371
96
                                       Builder, OP);
9372
96
      } break;
9373
72
      case IntrinsicOp::IOP_InterlockedXor: {
9374
72
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx,
9375
72
                            baseOffset);
9376
72
        TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor,
9377
72
                                       Builder, OP);
9378
72
      } break;
9379
262
      case IntrinsicOp::IOP_InterlockedCompareStore:
9380
508
      case IntrinsicOp::IOP_InterlockedCompareExchange: {
9381
508
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
9382
508
                            handle, bufIdx, baseOffset);
9383
508
        TranslateAtomicCmpXChg(helper, Builder, OP);
9384
508
      } break;
9385
14
      case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
9386
28
      case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
9387
28
        Type *i32Ty = Type::getInt32Ty(userCall->getContext());
9388
28
        AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange,
9389
28
                            handle, bufIdx, baseOffset, i32Ty);
9390
28
        TranslateAtomicCmpXChg(helper, Builder, OP);
9391
28
      } break;
9392
0
      default:
9393
0
        DXASSERT(0, "invalid opcode");
9394
0
        break;
9395
1.53k
      }
9396
1.53k
      userCall->eraseFromParent();
9397
2.14k
    } else if (group == HLOpcodeGroup::HLMatLoadStore)
9398
      // Load/Store matrix within a struct
9399
2.00k
      TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx,
9400
2.00k
                                baseOffset, DL);
9401
146
    else if (group == HLOpcodeGroup::HLSubscript) {
9402
      // Subscript of matrix within a struct
9403
146
      TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx,
9404
146
                                     baseOffset, status, OP, DL);
9405
146
    }
9406
33.7k
  } else if (LoadInst *LdInst = dyn_cast<LoadInst>(user)) {
9407
    // Load of scalar/vector within a struct or structured raw load.
9408
9.18k
    ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status);
9409
9.18k
    TranslateBufLoad(helper, ResKind, Builder, OP, DL);
9410
9411
9.18k
    LdInst->eraseFromParent();
9412
24.5k
  } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) {
9413
    // Store of scalar/vector within a struct or structured raw store.
9414
9.21k
    Value *val = StInst->getValueOperand();
9415
9.21k
    TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx,
9416
9.21k
                   baseOffset, Builder, OP);
9417
9.21k
    StInst->eraseFromParent();
9418
15.3k
  } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) {
9419
    // Recurse users
9420
76
    for (auto U = BCI->user_begin(); U != BCI->user_end();) {
9421
46
      Value *BCIUser = *(U++);
9422
46
      TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser), handle,
9423
46
                                      ResKind, bufIdx, baseOffset, status, OP,
9424
46
                                      DL);
9425
46
    }
9426
30
    BCI->eraseFromParent();
9427
15.2k
  } else if (PHINode *Phi = dyn_cast<PHINode>(user)) {
9428
4
    if (Phi->getNumIncomingValues() != 1) {
9429
0
      dxilutil::EmitErrorOnInstruction(
9430
0
          Phi, "Phi not supported for buffer subscript");
9431
0
      return;
9432
0
    }
9433
    // Since the phi only has a single value we can safely process its
9434
    // users to translate the subscript. These single-value phis are
9435
    // inserted by the lcssa pass.
9436
8
    
for (auto U = Phi->user_begin(); 4
U != Phi->user_end();) {
9437
4
      Value *PhiUser = *(U++);
9438
4
      TranslateStructBufSubscriptUser(cast<Instruction>(PhiUser), handle,
9439
4
                                      ResKind, bufIdx, baseOffset, status, OP,
9440
4
                                      DL);
9441
4
    }
9442
4
    Phi->eraseFromParent();
9443
15.2k
  } else {
9444
    // should only used by GEP
9445
15.2k
    GetElementPtrInst *GEP = cast<GetElementPtrInst>(user);
9446
15.2k
    Type *Ty = GEP->getType()->getPointerElementType();
9447
9448
15.2k
    Value *offset = dxilutil::GEPIdxToOffset(GEP, Builder, OP, DL);
9449
15.2k
    DXASSERT_LOCALVAR(Ty,
9450
15.2k
                      offset->getType() == Type::getInt32Ty(Ty->getContext()),
9451
15.2k
                      "else bitness is wrong");
9452
    // No offset into element for Raw buffers; byte offset is in bufIdx.
9453
15.2k
    if (DXIL::IsRawBuffer(ResKind))
9454
574
      bufIdx = Builder.CreateAdd(offset, bufIdx);
9455
14.7k
    else
9456
14.7k
      baseOffset = Builder.CreateAdd(offset, baseOffset);
9457
9458
37.3k
    for (auto U = GEP->user_begin(); U != GEP->user_end();) {
9459
22.1k
      Value *GEPUser = *(U++);
9460
9461
22.1k
      TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle,
9462
22.1k
                                      ResKind, bufIdx, baseOffset, status, OP,
9463
22.1k
                                      DL);
9464
22.1k
    }
9465
    // delete the inst
9466
15.2k
    GEP->eraseFromParent();
9467
15.2k
  }
9468
37.3k
}
9469
9470
void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status,
9471
                                 hlsl::OP *OP, HLResource::Kind ResKind,
9472
13.0k
                                 const DataLayout &DL) {
9473
13.0k
  Value *subscriptIndex =
9474
13.0k
      CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
9475
13.0k
  Value *bufIdx = nullptr;
9476
13.0k
  Value *offset = nullptr;
9477
13.0k
  bufIdx = subscriptIndex;
9478
13.0k
  if (ResKind == HLResource::Kind::RawBuffer)
9479
284
    offset = UndefValue::get(Type::getInt32Ty(CI->getContext()));
9480
12.7k
  else
9481
    // StructuredBuffer, TypedBuffer, etc.
9482
12.7k
    offset = OP->GetU32Const(0);
9483
9484
28.1k
  for (auto U = CI->user_begin(); U != CI->user_end();) {
9485
15.1k
    Value *user = *(U++);
9486
9487
15.1k
    TranslateStructBufSubscriptUser(cast<Instruction>(user), handle, ResKind,
9488
15.1k
                                    bufIdx, offset, status, OP, DL);
9489
15.1k
  }
9490
13.0k
}
9491
} // namespace
9492
9493
// HLSubscript.
9494
namespace {
9495
9496
Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK,
9497
                                  DXIL::ResourceClass RC, Value *handle,
9498
                                  LoadInst *ldInst, IRBuilder<> &Builder,
9499
2.81k
                                  hlsl::OP *hlslOP, const DataLayout &DL) {
9500
  // The arguments to the call instruction are used to determine the access,
9501
  // the return value and type come from the load instruction.
9502
2.81k
  ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst);
9503
2.81k
  TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL);
9504
  // delete the ld
9505
2.81k
  ldInst->eraseFromParent();
9506
2.81k
  return ldHelper.retVal;
9507
2.81k
}
9508
9509
Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx,
9510
16
                       unsigned vectorSize, Instruction *InsertPt) {
9511
16
  IRBuilder<> Builder(InsertPt);
9512
16
  if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) {
9513
8
    VecVal =
9514
8
        Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue());
9515
8
  } else {
9516
8
    BasicBlock *BB = InsertPt->getParent();
9517
8
    BasicBlock *EndBB = BB->splitBasicBlock(InsertPt);
9518
9519
8
    TerminatorInst *TI = BB->getTerminator();
9520
8
    IRBuilder<> SwitchBuilder(TI);
9521
8
    LLVMContext &Ctx = InsertPt->getContext();
9522
9523
8
    SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize);
9524
8
    TI->eraseFromParent();
9525
9526
8
    Function *F = EndBB->getParent();
9527
8
    IRBuilder<> endSwitchBuilder(EndBB->begin());
9528
8
    Type *Ty = VecVal->getType();
9529
8
    PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1);
9530
9531
40
    for (unsigned i = 0; i < vectorSize; 
i++32
) {
9532
32
      BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB);
9533
32
      Switch->addCase(SwitchBuilder.getInt32(i), CaseBB);
9534
32
      IRBuilder<> CaseBuilder(CaseBB);
9535
9536
32
      Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i);
9537
32
      VecPhi->addIncoming(CaseVal, CaseBB);
9538
32
      CaseBuilder.CreateBr(EndBB);
9539
32
    }
9540
8
    VecPhi->addIncoming(VecVal, BB);
9541
8
    VecVal = VecPhi;
9542
8
  }
9543
16
  return VecVal;
9544
16
}
9545
9546
void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper,
9547
                                   HLObjectOperationLowerHelper *pObjHelper,
9548
8.46k
                                   bool &Translated) {
9549
8.46k
  Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
9550
9551
8.46k
  hlsl::OP *hlslOP = &helper.hlslOP;
9552
  // Resource ptr.
9553
8.46k
  Value *handle = ptr;
9554
8.46k
  DXIL::ResourceClass RC = pObjHelper->GetRC(handle);
9555
8.46k
  DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
9556
9557
8.46k
  Type *Ty = CI->getType()->getPointerElementType();
9558
9559
17.2k
  for (auto It = CI->user_begin(); It != CI->user_end();) {
9560
8.75k
    User *user = *(It++);
9561
8.75k
    Instruction *I = cast<Instruction>(user);
9562
8.75k
    IRBuilder<> Builder(I);
9563
8.75k
    Value *UndefI = UndefValue::get(Builder.getInt32Ty());
9564
8.75k
    if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) {
9565
2.77k
      TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP,
9566
2.77k
                                 helper.dataLayout);
9567
5.97k
    } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) {
9568
3.27k
      Value *val = stInst->getValueOperand();
9569
3.27k
      TranslateStore(RK, handle, val,
9570
3.27k
                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
9571
3.27k
                     UndefI, Builder, hlslOP);
9572
      // delete the st
9573
3.27k
      stInst->eraseFromParent();
9574
3.27k
    } else 
if (GetElementPtrInst *2.70k
GEP2.70k
= dyn_cast<GetElementPtrInst>(user)) {
9575
      // Must be vector type here.
9576
56
      unsigned vectorSize = Ty->getVectorNumElements();
9577
56
      DXASSERT_NOMSG(GEP->getNumIndices() == 2);
9578
56
      Use *GEPIdx = GEP->idx_begin();
9579
56
      GEPIdx++;
9580
56
      Value *EltIdx = *GEPIdx;
9581
96
      for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) {
9582
56
        User *GEPUser = *(GEPIt++);
9583
56
        if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) {
9584
16
          IRBuilder<> StBuilder(SI);
9585
          // Generate Ld.
9586
16
          LoadInst *tmpLd = StBuilder.CreateLoad(CI);
9587
9588
16
          Value *ldVal = TranslateTypedBufSubscript(
9589
16
              CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout);
9590
          // Update vector.
9591
16
          ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx,
9592
16
                                  vectorSize, SI);
9593
          // Generate St.
9594
          // Reset insert point, UpdateVectorElt may move SI to different block.
9595
16
          StBuilder.SetInsertPoint(SI);
9596
16
          TranslateStore(
9597
16
              RK, handle, ldVal,
9598
16
              CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI,
9599
16
              StBuilder, hlslOP);
9600
16
          SI->eraseFromParent();
9601
16
          continue;
9602
16
        }
9603
40
        if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) {
9604
24
          IRBuilder<> LdBuilder(LI);
9605
9606
          // Generate tmp vector load with vector type & translate it
9607
24
          LoadInst *tmpLd = LdBuilder.CreateLoad(CI);
9608
9609
24
          Value *ldVal = TranslateTypedBufSubscript(
9610
24
              CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout);
9611
9612
          // get the single element
9613
24
          ldVal = GenerateVecEltFromGEP(ldVal, GEP, LdBuilder,
9614
24
                                        /*bInsertLdNextToGEP*/ false);
9615
9616
24
          LI->replaceAllUsesWith(ldVal);
9617
24
          LI->eraseFromParent();
9618
24
          continue;
9619
24
        }
9620
        // Invalid operations.
9621
16
        Translated = false;
9622
16
        dxilutil::EmitErrorOnInstruction(GEP,
9623
16
                                         "Invalid operation on typed buffer.");
9624
16
        return;
9625
40
      }
9626
40
      GEP->eraseFromParent();
9627
2.64k
    } else {
9628
2.64k
      CallInst *userCall = cast<CallInst>(user);
9629
2.64k
      HLOpcodeGroup group =
9630
2.64k
          hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction());
9631
2.64k
      unsigned opcode = hlsl::GetHLOpcode(userCall);
9632
2.64k
      if (group == HLOpcodeGroup::HLIntrinsic) {
9633
2.64k
        IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode);
9634
2.64k
        if (RC == DXIL::ResourceClass::SRV) {
9635
          // Invalid operations.
9636
0
          Translated = false;
9637
0
          dxilutil::EmitErrorOnInstruction(userCall,
9638
0
                                           "Invalid operation on SRV.");
9639
0
          return;
9640
0
        }
9641
2.64k
        switch (IOP) {
9642
370
        case IntrinsicOp::IOP_InterlockedAdd: {
9643
370
          ResLoadHelper helper(CI, RK, RC, handle,
9644
370
                               IntrinsicOp::IOP_InterlockedAdd);
9645
370
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9646
370
                                  helper.addr, /*offset*/ nullptr);
9647
370
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add,
9648
370
                                         Builder, hlslOP);
9649
370
        } break;
9650
192
        case IntrinsicOp::IOP_InterlockedAnd: {
9651
192
          ResLoadHelper helper(CI, RK, RC, handle,
9652
192
                               IntrinsicOp::IOP_InterlockedAnd);
9653
192
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9654
192
                                  helper.addr, /*offset*/ nullptr);
9655
192
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And,
9656
192
                                         Builder, hlslOP);
9657
192
        } break;
9658
356
        case IntrinsicOp::IOP_InterlockedExchange: {
9659
356
          ResLoadHelper helper(CI, RK, RC, handle,
9660
356
                               IntrinsicOp::IOP_InterlockedExchange);
9661
356
          Type *opType = nullptr;
9662
356
          PointerType *ptrType = dyn_cast<PointerType>(
9663
356
              userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)
9664
356
                  ->getType());
9665
356
          if (ptrType && ptrType->getElementType()->isFloatTy())
9666
12
            opType = Type::getInt32Ty(userCall->getContext());
9667
356
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9668
356
                                  helper.addr, /*offset*/ nullptr, opType);
9669
356
          TranslateAtomicBinaryOperation(
9670
356
              atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP);
9671
356
        } break;
9672
108
        case IntrinsicOp::IOP_InterlockedMax: {
9673
108
          ResLoadHelper helper(CI, RK, RC, handle,
9674
108
                               IntrinsicOp::IOP_InterlockedMax);
9675
108
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9676
108
                                  helper.addr, /*offset*/ nullptr);
9677
108
          TranslateAtomicBinaryOperation(
9678
108
              atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP);
9679
108
        } break;
9680
108
        case IntrinsicOp::IOP_InterlockedMin: {
9681
108
          ResLoadHelper helper(CI, RK, RC, handle,
9682
108
                               IntrinsicOp::IOP_InterlockedMin);
9683
108
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9684
108
                                  helper.addr, /*offset*/ nullptr);
9685
108
          TranslateAtomicBinaryOperation(
9686
108
              atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP);
9687
108
        } break;
9688
116
        case IntrinsicOp::IOP_InterlockedUMax: {
9689
116
          ResLoadHelper helper(CI, RK, RC, handle,
9690
116
                               IntrinsicOp::IOP_InterlockedUMax);
9691
116
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9692
116
                                  helper.addr, /*offset*/ nullptr);
9693
116
          TranslateAtomicBinaryOperation(
9694
116
              atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP);
9695
116
        } break;
9696
116
        case IntrinsicOp::IOP_InterlockedUMin: {
9697
116
          ResLoadHelper helper(CI, RK, RC, handle,
9698
116
                               IntrinsicOp::IOP_InterlockedUMin);
9699
116
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9700
116
                                  helper.addr, /*offset*/ nullptr);
9701
116
          TranslateAtomicBinaryOperation(
9702
116
              atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP);
9703
116
        } break;
9704
200
        case IntrinsicOp::IOP_InterlockedOr: {
9705
200
          ResLoadHelper helper(CI, RK, RC, handle,
9706
200
                               IntrinsicOp::IOP_InterlockedOr);
9707
200
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9708
200
                                  helper.addr, /*offset*/ nullptr);
9709
200
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or,
9710
200
                                         Builder, hlslOP);
9711
200
        } break;
9712
192
        case IntrinsicOp::IOP_InterlockedXor: {
9713
192
          ResLoadHelper helper(CI, RK, RC, handle,
9714
192
                               IntrinsicOp::IOP_InterlockedXor);
9715
192
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle,
9716
192
                                  helper.addr, /*offset*/ nullptr);
9717
192
          TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor,
9718
192
                                         Builder, hlslOP);
9719
192
        } break;
9720
442
        case IntrinsicOp::IOP_InterlockedCompareStore:
9721
860
        case IntrinsicOp::IOP_InterlockedCompareExchange: {
9722
860
          ResLoadHelper helper(CI, RK, RC, handle,
9723
860
                               IntrinsicOp::IOP_InterlockedCompareExchange);
9724
860
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
9725
860
                                  handle, helper.addr, /*offset*/ nullptr);
9726
860
          TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
9727
860
        } break;
9728
14
        case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise:
9729
28
        case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: {
9730
28
          Type *i32Ty = Type::getInt32Ty(userCall->getContext());
9731
28
          ResLoadHelper helper(CI, RK, RC, handle,
9732
28
                               IntrinsicOp::IOP_InterlockedCompareExchange);
9733
28
          AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange,
9734
28
                                  handle, helper.addr, /*offset*/ nullptr,
9735
28
                                  i32Ty);
9736
28
          TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP);
9737
28
        } break;
9738
0
        default:
9739
0
          DXASSERT(0, "invalid opcode");
9740
0
          break;
9741
2.64k
        }
9742
2.64k
      } else {
9743
0
        DXASSERT(0, "invalid group");
9744
0
      }
9745
2.64k
      userCall->eraseFromParent();
9746
2.64k
    }
9747
8.75k
  }
9748
8.46k
}
9749
} // namespace
9750
9751
void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode,
9752
                          HLOperationLowerHelper &helper,
9753
                          HLObjectOperationLowerHelper *pObjHelper,
9754
29.8k
                          bool &Translated) {
9755
29.8k
  if (CI->user_empty()) {
9756
0
    Translated = true;
9757
0
    return;
9758
0
  }
9759
29.8k
  hlsl::OP *hlslOP = &helper.hlslOP;
9760
9761
29.8k
  Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
9762
29.8k
  if (opcode == HLSubscriptOpcode::CBufferSubscript) {
9763
8.73k
    dxilutil::MergeGepUse(CI);
9764
    // Resource ptr.
9765
8.73k
    Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
9766
8.73k
    TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys,
9767
8.73k
                                helper.dataLayout, pObjHelper);
9768
8.73k
    Translated = true;
9769
8.73k
    return;
9770
8.73k
  }
9771
9772
21.1k
  if (opcode == HLSubscriptOpcode::DoubleSubscript) {
9773
    // Resource ptr.
9774
180
    Value *handle = ptr;
9775
180
    DXIL::ResourceKind RK = pObjHelper->GetRK(handle);
9776
180
    Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx);
9777
180
    Value *mipLevel =
9778
180
        CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx);
9779
9780
180
    auto U = CI->user_begin();
9781
180
    DXASSERT(CI->hasOneUse(), "subscript should only have one use");
9782
180
    IRBuilder<> Builder(CI);
9783
180
    if (LoadInst *ldInst = dyn_cast<LoadInst>(*U)) {
9784
140
      Value *Offset = UndefValue::get(Builder.getInt32Ty());
9785
140
      ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset,
9786
140
                             /*status*/ nullptr, mipLevel);
9787
140
      TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout);
9788
140
      ldInst->eraseFromParent();
9789
140
    } else {
9790
40
      StoreInst *stInst = cast<StoreInst>(*U);
9791
40
      Value *val = stInst->getValueOperand();
9792
40
      Value *UndefI = UndefValue::get(Builder.getInt32Ty());
9793
40
      TranslateStore(RK, handle, val,
9794
40
                     CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx),
9795
40
                     UndefI, Builder, hlslOP, mipLevel);
9796
40
      stInst->eraseFromParent();
9797
40
    }
9798
180
    Translated = true;
9799
180
    return;
9800
180
  }
9801
9802
20.9k
  Type *HandleTy = hlslOP->GetHandleType();
9803
20.9k
  if (ptr->getType() == hlslOP->GetNodeRecordHandleType()) {
9804
0
    DXASSERT(false, "Shouldn't get here, NodeRecord subscripts should have "
9805
0
                    "been lowered in LowerRecordAccessToGetNodeRecordPtr");
9806
0
    return;
9807
0
  }
9808
9809
20.9k
  if (ptr->getType() == HandleTy) {
9810
    // Resource ptr.
9811
20.4k
    Value *handle = ptr;
9812
20.4k
    DXIL::ResourceKind RK = DxilResource::Kind::Invalid;
9813
20.4k
    Type *ObjTy = nullptr;
9814
20.4k
    Type *RetTy = nullptr;
9815
20.4k
    RK = pObjHelper->GetRK(handle);
9816
20.4k
    if (RK == DxilResource::Kind::Invalid) {
9817
0
      Translated = false;
9818
0
      return;
9819
0
    }
9820
20.4k
    ObjTy = pObjHelper->GetResourceType(handle);
9821
20.4k
    RetTy = ObjTy->getStructElementType(0);
9822
20.4k
    Translated = true;
9823
9824
20.4k
    if (DXIL::IsStructuredBuffer(RK))
9825
11.9k
      TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK,
9826
11.9k
                                  helper.dataLayout);
9827
8.46k
    else
9828
8.46k
      TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated);
9829
9830
20.4k
    return;
9831
20.4k
  }
9832
9833
498
  Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx);
9834
498
  if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) {
9835
    // Translate matrix into vector of array for share memory or local
9836
    // variable should be done in HLMatrixLowerPass
9837
0
    DXASSERT_NOMSG(0);
9838
0
    Translated = true;
9839
0
    return;
9840
0
  }
9841
9842
  // Other case should be take care in TranslateStructBufSubscript or
9843
  // TranslateCBOperations.
9844
498
  Translated = false;
9845
498
}
9846
9847
void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper,
9848
12.7k
                                 HLObjectOperationLowerHelper *pObjHelper) {
9849
42.6k
  for (auto U = F->user_begin(); U != F->user_end();) {
9850
29.8k
    Value *user = *(U++);
9851
29.8k
    if (!isa<Instruction>(user))
9852
0
      continue;
9853
    // must be call inst
9854
29.8k
    CallInst *CI = cast<CallInst>(user);
9855
29.8k
    unsigned opcode = GetHLOpcode(CI);
9856
29.8k
    bool Translated = true;
9857
29.8k
    TranslateHLSubscript(CI, static_cast<HLSubscriptOpcode>(opcode), helper,
9858
29.8k
                         pObjHelper, Translated);
9859
29.8k
    if (Translated) {
9860
      // delete the call
9861
29.3k
      DXASSERT(CI->use_empty(),
9862
29.3k
               "else TranslateHLSubscript didn't replace/erase uses");
9863
29.3k
      CI->eraseFromParent();
9864
29.3k
    }
9865
29.8k
  }
9866
12.7k
}
9867
9868
// Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast
9869
// of alloca, and return load from alloca If bOrigAllocaTy is true: create
9870
// alloca of old type instead, write to alloca, and return load from bitcast of
9871
// alloca
9872
static Instruction *BitCastValueOrPtr(Value *V, Instruction *Insert, Type *Ty,
9873
                                      bool bOrigAllocaTy = false,
9874
164
                                      const Twine &Name = "") {
9875
164
  IRBuilder<> Builder(Insert);
9876
164
  if (Ty->isPointerTy()) {
9877
    // If pointer, we can bitcast directly
9878
0
    return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name));
9879
0
  }
9880
9881
  // If value, we have to alloca, store to bitcast ptr, and load
9882
164
  IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert));
9883
164
  Type *allocaTy = bOrigAllocaTy ? 
V->getType()0
: Ty;
9884
164
  Type *otherTy = bOrigAllocaTy ? 
Ty0
: V->getType();
9885
164
  Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy);
9886
164
  Instruction *bitCast = cast<Instruction>(
9887
164
      Builder.CreateBitCast(allocaInst, otherTy->getPointerTo()));
9888
164
  Builder.CreateStore(V, bOrigAllocaTy ? 
allocaInst0
: bitCast);
9889
164
  return Builder.CreateLoad(bOrigAllocaTy ? 
bitCast0
: allocaInst, Name);
9890
164
}
9891
9892
static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal,
9893
0
                                           unsigned toRows, unsigned toCols) {
9894
0
  SmallVector<int, 16> castMask(toCols * toRows);
9895
0
  unsigned idx = 0;
9896
0
  for (unsigned r = 0; r < toRows; r++)
9897
0
    for (unsigned c = 0; c < toCols; c++)
9898
0
      castMask[idx++] = c * toRows + r;
9899
0
  return cast<Instruction>(
9900
0
      Builder.CreateShuffleVector(vecVal, vecVal, castMask));
9901
0
}
9902
9903
void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
9904
                                 hlsl::HLOpcodeGroup group,
9905
86.5k
                                 HLObjectOperationLowerHelper *pObjHelper) {
9906
86.5k
  if (group == HLOpcodeGroup::HLIntrinsic) {
9907
    // map to dxil operations
9908
89.4k
    for (auto U = F->user_begin(); U != F->user_end();) {
9909
65.8k
      Value *User = *(U++);
9910
65.8k
      if (!isa<Instruction>(User))
9911
0
        continue;
9912
      // must be call inst
9913
65.8k
      CallInst *CI = cast<CallInst>(User);
9914
9915
      // Keep the instruction to lower by other function.
9916
65.8k
      bool Translated = true;
9917
9918
65.8k
      TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated);
9919
9920
65.8k
      if (Translated) {
9921
        // delete the call
9922
64.8k
        DXASSERT(CI->use_empty(),
9923
64.8k
                 "else TranslateBuiltinIntrinsic didn't replace/erase uses");
9924
64.8k
        CI->eraseFromParent();
9925
64.8k
      }
9926
65.8k
    }
9927
63.0k
  } else {
9928
63.0k
    if (group == HLOpcodeGroup::HLMatLoadStore) {
9929
      // Both ld/st use arg1 for the pointer.
9930
0
      Type *PtrTy =
9931
0
          F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx);
9932
9933
0
      if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) {
9934
        // Translate matrix into vector of array for shared memory
9935
        // variable should be done in HLMatrixLowerPass.
9936
0
        if (!F->user_empty())
9937
0
          F->getContext().emitError("Fail to lower matrix load/store.");
9938
0
      } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) {
9939
        // Default address space may be function argument in lib target
9940
0
        if (!F->user_empty()) {
9941
0
          for (auto U = F->user_begin(); U != F->user_end();) {
9942
0
            Value *User = *(U++);
9943
0
            if (!isa<Instruction>(User))
9944
0
              continue;
9945
            // must be call inst
9946
0
            CallInst *CI = cast<CallInst>(User);
9947
0
            IRBuilder<> Builder(CI);
9948
0
            HLMatLoadStoreOpcode opcode =
9949
0
                static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
9950
0
            switch (opcode) {
9951
0
            case HLMatLoadStoreOpcode::ColMatStore:
9952
0
            case HLMatLoadStoreOpcode::RowMatStore: {
9953
0
              Value *vecVal =
9954
0
                  CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx);
9955
0
              Value *matPtr =
9956
0
                  CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx);
9957
0
              matPtr = SkipAddrSpaceCast(matPtr);
9958
0
              unsigned addrSpace =
9959
0
                  cast<PointerType>(matPtr->getType())->getAddressSpace();
9960
9961
0
              Value *castPtr = Builder.CreateBitCast(
9962
0
                  matPtr, vecVal->getType()->getPointerTo(addrSpace));
9963
0
              Builder.CreateStore(vecVal, castPtr);
9964
0
              CI->eraseFromParent();
9965
0
            } break;
9966
0
            case HLMatLoadStoreOpcode::ColMatLoad:
9967
0
            case HLMatLoadStoreOpcode::RowMatLoad: {
9968
0
              Value *matPtr =
9969
0
                  CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx);
9970
0
              matPtr = SkipAddrSpaceCast(matPtr);
9971
0
              unsigned addrSpace =
9972
0
                  cast<PointerType>(matPtr->getType())->getAddressSpace();
9973
0
              Value *castPtr = Builder.CreateBitCast(
9974
0
                  matPtr, CI->getType()->getPointerTo(addrSpace));
9975
0
              Value *vecVal = Builder.CreateLoad(castPtr);
9976
0
              CI->replaceAllUsesWith(vecVal);
9977
0
              CI->eraseFromParent();
9978
0
            } break;
9979
0
            }
9980
0
          }
9981
0
        }
9982
0
      }
9983
63.0k
    } else if (group == HLOpcodeGroup::HLCast) {
9984
      // HLCast may be used on matrix value function argument in lib target
9985
2.03k
      if (!F->user_empty()) {
9986
5.46k
        for (auto U = F->user_begin(); U != F->user_end();) {
9987
3.42k
          Value *User = *(U++);
9988
3.42k
          if (!isa<Instruction>(User))
9989
0
            continue;
9990
          // must be call inst
9991
3.42k
          CallInst *CI = cast<CallInst>(User);
9992
3.42k
          IRBuilder<> Builder(CI);
9993
3.42k
          HLCastOpcode opcode =
9994
3.42k
              static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI));
9995
3.42k
          bool bTranspose = false;
9996
3.42k
          bool bColDest = false;
9997
3.42k
          switch (opcode) {
9998
0
          case HLCastOpcode::RowMatrixToColMatrix:
9999
0
            bColDest = true;
10000
0
            LLVM_FALLTHROUGH;
10001
0
          case HLCastOpcode::ColMatrixToRowMatrix:
10002
0
            bTranspose = true;
10003
0
            LLVM_FALLTHROUGH;
10004
78
          case HLCastOpcode::ColMatrixToVecCast:
10005
164
          case HLCastOpcode::RowMatrixToVecCast: {
10006
164
            Value *matVal =
10007
164
                CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx);
10008
164
            Value *vecVal =
10009
164
                BitCastValueOrPtr(matVal, CI, CI->getType(),
10010
164
                                  /*bOrigAllocaTy*/ false, matVal->getName());
10011
164
            if (bTranspose) {
10012
0
              HLMatrixType MatTy = HLMatrixType::cast(matVal->getType());
10013
0
              unsigned row = MatTy.getNumRows();
10014
0
              unsigned col = MatTy.getNumColumns();
10015
0
              if (bColDest)
10016
0
                std::swap(row, col);
10017
0
              vecVal = CreateTransposeShuffle(Builder, vecVal, row, col);
10018
0
            }
10019
164
            CI->replaceAllUsesWith(vecVal);
10020
164
            CI->eraseFromParent();
10021
164
          } break;
10022
3.42k
          }
10023
3.42k
        }
10024
2.03k
      }
10025
60.9k
    } else if (group == HLOpcodeGroup::HLSubscript) {
10026
12.7k
      TranslateSubscriptOperation(F, helper, pObjHelper);
10027
12.7k
    }
10028
    // map to math function or llvm ir
10029
63.0k
  }
10030
86.5k
}
10031
10032
typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
10033
static void TranslateHLExtension(Function *F,
10034
                                 HLSLExtensionsCodegenHelper *helper,
10035
                                 OP &hlslOp,
10036
68
                                 HLObjectOperationLowerHelper &objHelper) {
10037
  // Find all calls to the function F.
10038
  // Store the calls in a vector for now to be replaced the loop below.
10039
  // We use a two step "find then replace" to avoid removing uses while
10040
  // iterating.
10041
68
  SmallVector<CallInst *, 8> CallsToReplace;
10042
72
  for (User *U : F->users()) {
10043
72
    if (CallInst *CI = dyn_cast<CallInst>(U)) {
10044
72
      CallsToReplace.push_back(CI);
10045
72
    }
10046
72
  }
10047
10048
  // Get the lowering strategy to use for this intrinsic.
10049
68
  llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
10050
68
  HLObjectExtensionLowerHelper extObjHelper(objHelper);
10051
68
  ExtensionLowering lower(LowerStrategy, helper, hlslOp, extObjHelper);
10052
10053
  // Replace all calls that were successfully translated.
10054
72
  for (CallInst *CI : CallsToReplace) {
10055
72
    Value *Result = lower.Translate(CI);
10056
72
    if (Result && Result != CI) {
10057
72
      CI->replaceAllUsesWith(Result);
10058
72
      CI->eraseFromParent();
10059
72
    }
10060
72
  }
10061
68
}
10062
10063
namespace hlsl {
10064
10065
void TranslateBuiltinOperations(
10066
    HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper,
10067
20.4k
    std::unordered_set<Instruction *> &UpdateCounterSet) {
10068
20.4k
  HLOperationLowerHelper helper(HLM);
10069
10070
20.4k
  HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet};
10071
10072
20.4k
  Module *M = HLM.GetModule();
10073
10074
20.4k
  SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
10075
10076
  // generate dxil operation
10077
197k
  for (iplist<Function>::iterator F : M->getFunctionList()) {
10078
197k
    if (F->user_empty())
10079
34.8k
      continue;
10080
162k
    if (!F->isDeclaration()) {
10081
172
      continue;
10082
172
    }
10083
162k
    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
10084
162k
    if (group == HLOpcodeGroup::NotHL) {
10085
      // Nothing to do.
10086
75.8k
      continue;
10087
75.8k
    }
10088
86.6k
    if (group == HLOpcodeGroup::HLExtIntrinsic) {
10089
68
      TranslateHLExtension(F, extCodegenHelper, helper.hlslOP, objHelper);
10090
68
      continue;
10091
68
    }
10092
86.5k
    if (group == HLOpcodeGroup::HLIntrinsic) {
10093
23.5k
      CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst
10094
23.5k
      unsigned opcode = hlsl::GetHLOpcode(CI);
10095
23.5k
      if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) {
10096
116
        NonUniformResourceIndexIntrinsics.push_back(F);
10097
116
        continue;
10098
116
      }
10099
23.5k
    }
10100
86.4k
    TranslateHLBuiltinOperation(F, helper, group, &objHelper);
10101
86.4k
  }
10102
10103
  // Translate last so value placed in NonUniformSet is still valid.
10104
20.4k
  if (!NonUniformResourceIndexIntrinsics.empty()) {
10105
116
    for (auto F : NonUniformResourceIndexIntrinsics) {
10106
116
      TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic,
10107
116
                                  &objHelper);
10108
116
    }
10109
90
  }
10110
20.4k
}
10111
10112
void EmitGetNodeRecordPtrAndUpdateUsers(HLOperationLowerHelper &helper,
10113
656
                                        CallInst *CI, Value *ArrayIndex) {
10114
656
  IRBuilder<> Builder(CI);
10115
656
  Value *opArg = nullptr;
10116
656
  Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
10117
656
  opArg = Builder.getInt32((unsigned)DXIL::OpCode::GetNodeRecordPtr);
10118
656
  StructType *origRecordUDT =
10119
656
      cast<StructType>(cast<PointerType>(CI->getType())->getElementType());
10120
656
  Type *getNodeRecordPtrRT = origRecordUDT;
10121
  // Translate node record type here
10122
656
  auto findIt = helper.loweredTypes.find(origRecordUDT);
10123
656
  if (findIt != helper.loweredTypes.end()) {
10124
244
    getNodeRecordPtrRT = findIt->second;
10125
412
  } else {
10126
412
    getNodeRecordPtrRT = GetLoweredUDT(origRecordUDT, &helper.dxilTypeSys);
10127
412
    if (origRecordUDT != getNodeRecordPtrRT)
10128
112
      helper.loweredTypes[origRecordUDT] = getNodeRecordPtrRT;
10129
412
  }
10130
656
  getNodeRecordPtrRT =
10131
656
      getNodeRecordPtrRT->getPointerTo(DXIL::kNodeRecordAddrSpace);
10132
656
  Function *getNodeRecordPtr = helper.hlslOP.GetOpFunc(
10133
656
      DXIL::OpCode::GetNodeRecordPtr, getNodeRecordPtrRT);
10134
656
  Value *args[] = {opArg, Handle, ArrayIndex};
10135
656
  Value *NodeRecordPtr = Builder.CreateCall(getNodeRecordPtr, args);
10136
656
  ReplaceUsesForLoweredUDT(CI, NodeRecordPtr);
10137
656
}
10138
10139
20.4k
void LowerRecordAccessToGetNodeRecordPtr(HLModule &HLM) {
10140
20.4k
  Module *M = HLM.GetModule();
10141
20.4k
  HLOperationLowerHelper helper(HLM);
10142
164k
  for (iplist<Function>::iterator F : M->getFunctionList()) {
10143
164k
    if (F->user_empty())
10144
32.3k
      continue;
10145
132k
    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F);
10146
132k
    if (group == HLOpcodeGroup::HLSubscript) {
10147
43.6k
      for (auto U = F->user_begin(); U != F->user_end();) {
10148
30.5k
        Value *User = *(U++);
10149
30.5k
        if (!isa<Instruction>(User))
10150
0
          continue;
10151
        // must be call inst
10152
30.5k
        CallInst *CI = cast<CallInst>(User);
10153
30.5k
        HLSubscriptOpcode opcode =
10154
30.5k
            static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI));
10155
30.5k
        if (opcode != HLSubscriptOpcode::DefaultSubscript)
10156
9.47k
          continue;
10157
10158
21.0k
        hlsl::OP *OP = &helper.hlslOP;
10159
21.0k
        Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
10160
21.0k
        if (Handle->getType() != OP->GetNodeRecordHandleType()) {
10161
20.4k
          continue;
10162
20.4k
        }
10163
10164
656
        Value *Index = CI->getNumArgOperands() > 2
10165
656
                           ? 
CI->getArgOperand(2)328
10166
656
                           : 
ConstantInt::get(helper.i32Ty, 0)328
;
10167
656
        EmitGetNodeRecordPtrAndUpdateUsers(helper, CI, Index);
10168
656
        CI->eraseFromParent();
10169
656
      }
10170
13.0k
    }
10171
132k
  }
10172
20.4k
}
10173
} // namespace hlsl