/home/runner/work/DirectXShaderCompiler/DirectXShaderCompiler/lib/HLSL/HLOperationLower.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | // // |
3 | | // HLOperationLower.cpp // |
4 | | // Copyright (C) Microsoft Corporation. All rights reserved. // |
5 | | // This file is distributed under the University of Illinois Open Source // |
6 | | // License. See LICENSE.TXT for details. // |
7 | | // // |
8 | | // Lower functions to lower HL operations to DXIL operations. // |
9 | | // // |
10 | | /////////////////////////////////////////////////////////////////////////////// |
11 | | |
12 | | #include "dxc/DXIL/DxilConstants.h" |
13 | | #define _USE_MATH_DEFINES |
14 | | #include <array> |
15 | | #include <cmath> |
16 | | #include <functional> |
17 | | #include <unordered_set> |
18 | | |
19 | | #include "dxc/DXIL/DxilConstants.h" |
20 | | #include "dxc/DXIL/DxilInstructions.h" |
21 | | #include "dxc/DXIL/DxilModule.h" |
22 | | #include "dxc/DXIL/DxilOperations.h" |
23 | | #include "dxc/DXIL/DxilResourceProperties.h" |
24 | | #include "dxc/DXIL/DxilUtil.h" |
25 | | #include "dxc/HLSL/DxilPoisonValues.h" |
26 | | #include "dxc/HLSL/HLLowerUDT.h" |
27 | | #include "dxc/HLSL/HLMatrixLowerHelper.h" |
28 | | #include "dxc/HLSL/HLMatrixType.h" |
29 | | #include "dxc/HLSL/HLModule.h" |
30 | | #include "dxc/HLSL/HLOperationLower.h" |
31 | | #include "dxc/HLSL/HLOperationLowerExtension.h" |
32 | | #include "dxc/HLSL/HLOperations.h" |
33 | | #include "dxc/HlslIntrinsicOp.h" |
34 | | |
35 | | #include "llvm/ADT/APSInt.h" |
36 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
37 | | #include "llvm/IR/IRBuilder.h" |
38 | | #include "llvm/IR/Instructions.h" |
39 | | #include "llvm/IR/IntrinsicInst.h" |
40 | | #include "llvm/IR/Module.h" |
41 | | |
42 | | using namespace llvm; |
43 | | using namespace hlsl; |
44 | | |
45 | | struct HLOperationLowerHelper { |
46 | | HLModule &M; |
47 | | OP &hlslOP; |
48 | | Type *voidTy; |
49 | | Type *f32Ty; |
50 | | Type *i32Ty; |
51 | | Type *i16Ty; |
52 | | llvm::Type *i1Ty; |
53 | | Type *i8Ty; |
54 | | DxilTypeSystem &dxilTypeSys; |
55 | | DxilFunctionProps *functionProps; |
56 | | DataLayout dataLayout; |
57 | | SmallDenseMap<Type *, Type *, 4> loweredTypes; |
58 | | HLOperationLowerHelper(HLModule &HLM); |
59 | | }; |
60 | | |
61 | | HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM) |
62 | 40.9k | : M(HLM), hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()), |
63 | 40.9k | dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision |
64 | 40.9k | ? hlsl::DXIL::kLegacyLayoutString38.6k |
65 | 40.9k | : hlsl::DXIL::kNewLayoutString2.24k )) { |
66 | 40.9k | llvm::LLVMContext &Ctx = HLM.GetCtx(); |
67 | 40.9k | voidTy = Type::getVoidTy(Ctx); |
68 | 40.9k | f32Ty = Type::getFloatTy(Ctx); |
69 | 40.9k | i32Ty = Type::getInt32Ty(Ctx); |
70 | 40.9k | i16Ty = Type::getInt16Ty(Ctx); |
71 | 40.9k | i1Ty = Type::getInt1Ty(Ctx); |
72 | 40.9k | i8Ty = Type::getInt8Ty(Ctx); |
73 | 40.9k | Function *EntryFunc = HLM.GetEntryFunction(); |
74 | 40.9k | functionProps = nullptr; |
75 | 40.9k | if (HLM.HasDxilFunctionProps(EntryFunc)) |
76 | 35.1k | functionProps = &HLM.GetDxilFunctionProps(EntryFunc); |
77 | 40.9k | } |
78 | | |
79 | | struct HLObjectOperationLowerHelper { |
80 | | private: |
81 | | // For object intrinsics. |
82 | | HLModule &HLM; |
83 | | struct ResAttribute { |
84 | | DXIL::ResourceClass RC; |
85 | | DXIL::ResourceKind RK; |
86 | | Type *ResourceType; |
87 | | }; |
88 | | std::unordered_map<Value *, ResAttribute> HandleMetaMap; |
89 | | std::unordered_set<Instruction *> &UpdateCounterSet; |
90 | | // Map from pointer of cbuffer to pointer of resource. |
91 | | // For cbuffer like this: |
92 | | // cbuffer A { |
93 | | // Texture2D T; |
94 | | // }; |
95 | | // A global resource Texture2D T2 will be created for Texture2D T. |
96 | | // CBPtrToResourceMap[T] will return T2. |
97 | | std::unordered_map<Value *, Value *> CBPtrToResourceMap; |
98 | | |
99 | | public: |
100 | | HLObjectOperationLowerHelper(HLModule &HLM, |
101 | | std::unordered_set<Instruction *> &UpdateCounter) |
102 | 20.4k | : HLM(HLM), UpdateCounterSet(UpdateCounter) {} |
103 | 18.6k | DXIL::ResourceClass GetRC(Value *Handle) { |
104 | 18.6k | ResAttribute &Res = FindCreateHandleResourceBase(Handle); |
105 | 18.6k | return Res.RC; |
106 | 18.6k | } |
107 | 46.0k | DXIL::ResourceKind GetRK(Value *Handle) { |
108 | 46.0k | ResAttribute &Res = FindCreateHandleResourceBase(Handle); |
109 | 46.0k | return Res.RK; |
110 | 46.0k | } |
111 | 20.4k | Type *GetResourceType(Value *Handle) { |
112 | 20.4k | ResAttribute &Res = FindCreateHandleResourceBase(Handle); |
113 | 20.4k | return Res.ResourceType; |
114 | 20.4k | } |
115 | | |
116 | 2.94k | void MarkHasCounter(Value *handle, Type *i8Ty) { |
117 | 2.94k | CallInst *CIHandle = cast<CallInst>(handle); |
118 | 2.94k | DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) == |
119 | 2.94k | HLOpcodeGroup::HLAnnotateHandle, |
120 | 2.94k | "else invalid handle"); |
121 | | // Mark has counter for the input handle. |
122 | 2.94k | Value *counterHandle = |
123 | 2.94k | CIHandle->getArgOperand(HLOperandIndex::kHandleOpIdx); |
124 | | // Change kind into StructurBufferWithCounter. |
125 | 2.94k | Constant *Props = cast<Constant>(CIHandle->getArgOperand( |
126 | 2.94k | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx)); |
127 | 2.94k | DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props); |
128 | 2.94k | RP.Basic.SamplerCmpOrHasCounter = true; |
129 | | |
130 | 2.94k | CIHandle->setArgOperand( |
131 | 2.94k | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx, |
132 | 2.94k | resource_helper::getAsConstant(RP, |
133 | 2.94k | HLM.GetOP()->GetResourcePropertiesType(), |
134 | 2.94k | *HLM.GetShaderModel())); |
135 | | |
136 | 2.94k | DXIL::ResourceClass RC = GetRC(handle); |
137 | 2.94k | DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV, |
138 | 2.94k | "must UAV for counter"); |
139 | 2.94k | std::unordered_set<Value *> resSet; |
140 | 2.94k | MarkHasCounterOnCreateHandle(counterHandle, resSet); |
141 | 2.94k | } |
142 | | |
143 | 28 | DxilResourceBase *FindCBufferResourceFromHandle(Value *handle) { |
144 | 28 | if (CallInst *CI = dyn_cast<CallInst>(handle)) { |
145 | 28 | hlsl::HLOpcodeGroup group = |
146 | 28 | hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
147 | 28 | if (group == HLOpcodeGroup::HLAnnotateHandle) { |
148 | 28 | handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
149 | 28 | } |
150 | 28 | } |
151 | | |
152 | 28 | Constant *symbol = nullptr; |
153 | 28 | if (CallInst *CI = dyn_cast<CallInst>(handle)) { |
154 | 28 | hlsl::HLOpcodeGroup group = |
155 | 28 | hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
156 | 28 | if (group == HLOpcodeGroup::HLCreateHandle) { |
157 | 28 | symbol = dyn_cast<Constant>( |
158 | 28 | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); |
159 | 28 | } |
160 | 28 | } |
161 | | |
162 | 28 | if (!symbol) |
163 | 0 | return nullptr; |
164 | | |
165 | 28 | for (const std::unique_ptr<DxilCBuffer> &res : HLM.GetCBuffers()) { |
166 | 28 | if (res->GetGlobalSymbol() == symbol) |
167 | 28 | return res.get(); |
168 | 28 | } |
169 | 0 | return nullptr; |
170 | 28 | } |
171 | | |
172 | | Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr, |
173 | | GlobalVariable *CbGV, |
174 | 314 | DxilResourceProperties &RP) { |
175 | | // Change array idx to 0 to make sure all array ptr share same key. |
176 | 314 | Value *Key = UniformCbPtr(CbPtr, CbGV); |
177 | 314 | if (CBPtrToResourceMap.count(Key)) |
178 | 24 | return CBPtrToResourceMap[Key]; |
179 | 290 | Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP); |
180 | 290 | CBPtrToResourceMap[Key] = Resource; |
181 | 290 | return Resource; |
182 | 314 | } |
183 | | |
184 | 314 | Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) { |
185 | | // Simple case. |
186 | 314 | if (ResPtr->getType() == CbPtr->getType()) |
187 | 314 | return ResPtr; |
188 | | |
189 | | // Array case. |
190 | 0 | DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy()); |
191 | |
|
192 | 0 | IRBuilder<> Builder(CbPtr); |
193 | 0 | gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr); |
194 | |
|
195 | 0 | Value *arrayIdx = GEPIt.getOperand(); |
196 | | |
197 | | // Only calc array idx and size. |
198 | | // Ignore struct type part. |
199 | 0 | for (; GEPIt != E; ++GEPIt) { |
200 | 0 | if (GEPIt->isArrayTy()) { |
201 | 0 | arrayIdx = Builder.CreateMul( |
202 | 0 | arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements())); |
203 | 0 | arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand()); |
204 | 0 | } |
205 | 0 | } |
206 | |
|
207 | 0 | return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx}); |
208 | 314 | } |
209 | | |
210 | 314 | DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) { |
211 | 314 | Constant *Props = cast<Constant>(Anno->getArgOperand( |
212 | 314 | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx)); |
213 | 314 | DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props); |
214 | 314 | return RP; |
215 | 314 | } |
216 | | |
217 | | private: |
218 | 85.1k | ResAttribute &FindCreateHandleResourceBase(Value *Handle) { |
219 | 85.1k | if (HandleMetaMap.count(Handle)) |
220 | 48.2k | return HandleMetaMap[Handle]; |
221 | | |
222 | | // Add invalid first to avoid dead loop. |
223 | 36.9k | HandleMetaMap[Handle] = { |
224 | 36.9k | DXIL::ResourceClass::Invalid, DXIL::ResourceKind::Invalid, |
225 | 36.9k | StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)}; |
226 | 36.9k | if (CallInst *CI = dyn_cast<CallInst>(Handle)) { |
227 | 36.9k | hlsl::HLOpcodeGroup group = |
228 | 36.9k | hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
229 | 36.9k | if (group == HLOpcodeGroup::HLAnnotateHandle) { |
230 | 36.9k | Constant *Props = cast<Constant>(CI->getArgOperand( |
231 | 36.9k | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx)); |
232 | 36.9k | DxilResourceProperties RP = |
233 | 36.9k | resource_helper::loadPropsFromConstant(*Props); |
234 | 36.9k | Type *ResTy = |
235 | 36.9k | CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx) |
236 | 36.9k | ->getType(); |
237 | | |
238 | 36.9k | ResAttribute Attrib = {RP.getResourceClass(), RP.getResourceKind(), |
239 | 36.9k | ResTy}; |
240 | | |
241 | 36.9k | HandleMetaMap[Handle] = Attrib; |
242 | 36.9k | return HandleMetaMap[Handle]; |
243 | 36.9k | } |
244 | 36.9k | } |
245 | 6 | dxilutil::EmitErrorOnContext(Handle->getContext(), |
246 | 6 | "cannot map resource to handle."); |
247 | | |
248 | 6 | return HandleMetaMap[Handle]; |
249 | 36.9k | } |
250 | | CallInst *FindCreateHandle(Value *handle, |
251 | 0 | std::unordered_set<Value *> &resSet) { |
252 | 0 | // Already checked. |
253 | 0 | if (resSet.count(handle)) |
254 | 0 | return nullptr; |
255 | 0 | resSet.insert(handle); |
256 | 0 |
|
257 | 0 | if (CallInst *CI = dyn_cast<CallInst>(handle)) |
258 | 0 | return CI; |
259 | 0 | if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) { |
260 | 0 | if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet)) |
261 | 0 | return CI; |
262 | 0 | if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet)) |
263 | 0 | return CI; |
264 | 0 | return nullptr; |
265 | 0 | } |
266 | 0 | if (PHINode *Phi = dyn_cast<PHINode>(handle)) { |
267 | 0 | for (unsigned i = 0; i < Phi->getNumOperands(); i++) { |
268 | 0 | if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet)) |
269 | 0 | return CI; |
270 | 0 | } |
271 | 0 | return nullptr; |
272 | 0 | } |
273 | 0 |
|
274 | 0 | return nullptr; |
275 | 0 | } |
276 | | void MarkHasCounterOnCreateHandle(Value *handle, |
277 | 2.94k | std::unordered_set<Value *> &resSet) { |
278 | | // Already checked. |
279 | 2.94k | if (resSet.count(handle)) |
280 | 0 | return; |
281 | 2.94k | resSet.insert(handle); |
282 | | |
283 | 2.94k | if (CallInst *CI = dyn_cast<CallInst>(handle)) { |
284 | 2.94k | Value *Res = |
285 | 2.94k | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx); |
286 | 2.94k | LoadInst *LdRes = dyn_cast<LoadInst>(Res); |
287 | 2.94k | if (LdRes) { |
288 | 2.93k | UpdateCounterSet.insert(LdRes); |
289 | 2.93k | return; |
290 | 2.93k | } |
291 | 8 | if (CallInst *CallRes = dyn_cast<CallInst>(Res)) { |
292 | 8 | hlsl::HLOpcodeGroup group = |
293 | 8 | hlsl::GetHLOpcodeGroup(CallRes->getCalledFunction()); |
294 | 8 | if (group == HLOpcodeGroup::HLCast) { |
295 | 8 | HLCastOpcode opcode = |
296 | 8 | static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CallRes)); |
297 | 8 | if (opcode == HLCastOpcode::HandleToResCast) { |
298 | 8 | if (Instruction *Hdl = dyn_cast<Instruction>( |
299 | 8 | CallRes->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx))) |
300 | 8 | UpdateCounterSet.insert(Hdl); |
301 | 8 | return; |
302 | 8 | } |
303 | 8 | } |
304 | 8 | } |
305 | 0 | dxilutil::EmitErrorOnInstruction(CI, "cannot map resource to handle."); |
306 | 0 | return; |
307 | 8 | } |
308 | 0 | if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) { |
309 | 0 | MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet); |
310 | 0 | MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet); |
311 | 0 | } |
312 | 0 | if (PHINode *Phi = dyn_cast<PHINode>(handle)) { |
313 | 0 | for (unsigned i = 0; i < Phi->getNumOperands(); i++) { |
314 | 0 | MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet); |
315 | 0 | } |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | 314 | Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) { |
320 | 314 | gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr); |
321 | 314 | std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end()); |
322 | 314 | unsigned i = 0; |
323 | 314 | IRBuilder<> Builder(HLM.GetCtx()); |
324 | 314 | Value *zero = Builder.getInt32(0); |
325 | 1.29k | for (; GEPIt != E; ++GEPIt, ++i982 ) { |
326 | 982 | ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand()); |
327 | 982 | if (!ImmIdx) { |
328 | | // Remove dynamic indexing to avoid crash. |
329 | 8 | idxList[i] = zero; |
330 | 8 | } |
331 | 982 | } |
332 | | |
333 | 314 | Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList); |
334 | 314 | return Key; |
335 | 314 | } |
336 | | |
337 | | Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV, |
338 | 290 | DxilResourceProperties &RP) { |
339 | 290 | Type *CbTy = CbPtr->getPointerOperandType(); |
340 | 290 | DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), |
341 | 290 | "else arg not point to var"); |
342 | | |
343 | 290 | gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr); |
344 | 290 | unsigned i = 0; |
345 | 290 | IRBuilder<> Builder(HLM.GetCtx()); |
346 | 290 | unsigned arraySize = 1; |
347 | 290 | DxilTypeSystem &typeSys = HLM.GetTypeSystem(); |
348 | | |
349 | 290 | std::string Name; |
350 | 1.19k | for (; GEPIt != E; ++GEPIt, ++i902 ) { |
351 | 902 | if (GEPIt->isArrayTy()) { |
352 | 72 | arraySize *= GEPIt->getArrayNumElements(); |
353 | 72 | if (!Name.empty()) |
354 | 72 | Name += "."; |
355 | 72 | if (ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand())) { |
356 | 64 | unsigned idx = ImmIdx->getLimitedValue(); |
357 | 64 | Name += std::to_string(idx); |
358 | 64 | } |
359 | 830 | } else if (GEPIt->isStructTy()) { |
360 | 540 | DxilStructAnnotation *typeAnnot = |
361 | 540 | typeSys.GetStructAnnotation(cast<StructType>(*GEPIt)); |
362 | 540 | DXASSERT_NOMSG(typeAnnot); |
363 | 540 | unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue(); |
364 | 540 | DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx); |
365 | 540 | DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx); |
366 | 540 | if (!Name.empty()) |
367 | 250 | Name += "."; |
368 | 540 | Name += fieldAnnot.GetFieldName(); |
369 | 540 | } |
370 | 902 | } |
371 | | |
372 | 290 | Type *Ty = CbPtr->getResultElementType(); |
373 | | // Not support resource array in cbuffer. |
374 | 290 | unsigned ResBinding = |
375 | 290 | HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.getResourceClass()); |
376 | 290 | return CreateResourceGV(Ty, Name, RP, ResBinding); |
377 | 290 | } |
378 | | |
379 | | Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP, |
380 | 290 | unsigned ResBinding) { |
381 | 290 | Module &M = *HLM.GetModule(); |
382 | 290 | Constant *GV = M.getOrInsertGlobal(Name, Ty); |
383 | | // Create resource and set GV as globalSym. |
384 | 290 | DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP); |
385 | 290 | DXASSERT(Res, "fail to create resource for global variable in cbuffer"); |
386 | 290 | Res->SetLowerBound(ResBinding); |
387 | 290 | return GV; |
388 | 290 | } |
389 | | }; |
390 | | |
391 | | // Helper for lowering resource extension methods. |
392 | | struct HLObjectExtensionLowerHelper : public hlsl::HLResourceLookup { |
393 | | explicit HLObjectExtensionLowerHelper(HLObjectOperationLowerHelper &ObjHelper) |
394 | 68 | : m_ObjHelper(ObjHelper) {} |
395 | | |
396 | 6 | virtual bool GetResourceKindName(Value *HLHandle, const char **ppName) { |
397 | 6 | DXIL::ResourceKind K = m_ObjHelper.GetRK(HLHandle); |
398 | 6 | bool Success = K != DXIL::ResourceKind::Invalid; |
399 | 6 | if (Success) { |
400 | 6 | *ppName = hlsl::GetResourceKindName(K); |
401 | 6 | } |
402 | 6 | return Success; |
403 | 6 | } |
404 | | |
405 | | private: |
406 | | HLObjectOperationLowerHelper &m_ObjHelper; |
407 | | }; |
408 | | |
409 | | using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP, |
410 | | DXIL::OpCode opcode, |
411 | | HLOperationLowerHelper &helper, |
412 | | HLObjectOperationLowerHelper *pObjHelper, |
413 | | bool &Translated); |
414 | | |
415 | | struct IntrinsicLower { |
416 | | // Intrinsic opcode. |
417 | | IntrinsicOp IntriOpcode; |
418 | | // Lower function. |
419 | | IntrinsicLowerFuncTy &LowerFunc; |
420 | | // DXIL opcode if can direct map. |
421 | | DXIL::OpCode DxilOpcode; |
422 | | }; |
423 | | |
424 | | // IOP intrinsics. |
425 | | namespace { |
426 | | |
427 | | // Creates the necessary scalar calls to for a "trivial" operation where only |
428 | | // call instructions to a single function type are needed. |
429 | | // The overload type `Ty` determines what scalarization might be required. |
430 | | // Elements of any vectors in `refArgs` are extracted into scalars for each |
431 | | // call generated while the same scalar values are used unaltered in each call. |
432 | | // Utility objects `HlslOp` and `Builder` are used to generate calls to the |
433 | | // given `DxilFunc` for each set of scalar arguments. |
434 | | // The results are reconstructed into the given `RetTy` as needed. |
435 | | Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, |
436 | | ArrayRef<Value *> refArgs, Type *Ty, Type *RetTy, |
437 | 32.3k | OP *hlslOP, IRBuilder<> &Builder) { |
438 | 32.3k | unsigned argNum = refArgs.size(); |
439 | 32.3k | std::vector<Value *> args = refArgs; |
440 | | |
441 | 32.3k | if (Ty->isVectorTy()) { |
442 | 8.37k | Value *retVal = llvm::UndefValue::get(RetTy); |
443 | 8.37k | unsigned vecSize = Ty->getVectorNumElements(); |
444 | 35.0k | for (unsigned i = 0; i < vecSize; i++26.6k ) { |
445 | | // Update vector args, skip known opcode arg. |
446 | 67.1k | for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum; |
447 | 40.5k | argIdx++) { |
448 | 40.5k | if (refArgs[argIdx]->getType()->isVectorTy()) { |
449 | 36.9k | Value *arg = refArgs[argIdx]; |
450 | 36.9k | args[argIdx] = Builder.CreateExtractElement(arg, i); |
451 | 36.9k | } |
452 | 40.5k | } |
453 | 26.6k | Value *EltOP = |
454 | 26.6k | Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode)); |
455 | 26.6k | retVal = Builder.CreateInsertElement(retVal, EltOP, i); |
456 | 26.6k | } |
457 | 8.37k | return retVal; |
458 | 8.37k | } |
459 | | |
460 | | // Cannot add name to void. |
461 | 23.9k | if (RetTy->isVoidTy()) |
462 | 306 | return Builder.CreateCall(dxilFunc, args); |
463 | | |
464 | 23.6k | return Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode)); |
465 | 23.9k | } |
466 | | |
467 | | // Creates a native vector call to for a "trivial" operation where only a single |
468 | | // call instruction is needed. The overload and return types are the same vector |
469 | | // type `Ty`. |
470 | | // Utility objects `HlslOp` and `Builder` are used to create a call to the given |
471 | | // `DxilFunc` with `RefArgs` arguments. |
472 | | Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, |
473 | | ArrayRef<Value *> Args, Type *Ty, OP *OP, |
474 | 1.09k | IRBuilder<> &Builder) { |
475 | 1.09k | if (!Ty->isVoidTy()) |
476 | 1.09k | return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); |
477 | 0 | return Builder.CreateCall(Func, Args); // Cannot add name to void. |
478 | 1.09k | } |
479 | | |
480 | | // Generates a DXIL operation with the overloaded type based on `Ty` and return |
481 | | // type `RetTy`. When Ty is a vector, it will either generate per-element calls |
482 | | // for each vector element and reconstruct the vector type from those results or |
483 | | // operate on and return native vectors depending on vector size and the |
484 | | // legality of the vector overload. |
485 | | Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs, |
486 | | Type *Ty, Type *RetTy, OP *hlslOP, |
487 | 32.7k | IRBuilder<> &Builder) { |
488 | | |
489 | | // If supported and the overload type is a vector with more than 1 element, |
490 | | // create a native vector operation. |
491 | 32.7k | if (Ty->isVectorTy() && Ty->getVectorNumElements() > 19.33k && |
492 | 32.7k | hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()8.51k && |
493 | 32.7k | OP::IsOverloadLegal(opcode, Ty)1.08k ) { |
494 | 1.04k | Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); |
495 | 1.04k | return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP, |
496 | 1.04k | Builder); |
497 | 1.04k | } |
498 | | |
499 | | // Set overload type to the scalar type of `Ty` and generate call(s). |
500 | 31.6k | Type *EltTy = Ty->getScalarType(); |
501 | 31.6k | Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); |
502 | | |
503 | 31.6k | return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, |
504 | 31.6k | Builder); |
505 | 32.7k | } |
506 | | |
507 | | Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs, |
508 | 3.72k | Type *Ty, Instruction *Inst, OP *hlslOP) { |
509 | 3.72k | DXASSERT(refArgs.size() > 0, "else opcode isn't in signature"); |
510 | 3.72k | DXASSERT(refArgs[0] == nullptr, |
511 | 3.72k | "else caller has already filled the value in"); |
512 | 3.72k | IRBuilder<> B(Inst); |
513 | 3.72k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
514 | 3.72k | const_cast<llvm::Value **>(refArgs.data())[0] = |
515 | 3.72k | opArg; // actually stack memory from caller |
516 | 3.72k | return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); |
517 | 3.72k | } |
518 | | |
519 | | // Translate call that converts to a dxil unary operation with a different |
520 | | // return type from the overload by passing the argument, explicit return type, |
521 | | // and helper objects to the scalarizing unary dxil operation creation. |
522 | | Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, |
523 | | OP::OpCode OpCode, |
524 | | HLOperationLowerHelper &Helper, |
525 | | HLObjectOperationLowerHelper *, |
526 | 98 | bool &Translated) { |
527 | 98 | Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
528 | 98 | Type *Ty = Src->getType(); |
529 | | |
530 | 98 | IRBuilder<> Builder(CI); |
531 | 98 | hlsl::OP *OP = &Helper.hlslOP; |
532 | 98 | Type *RetTy = CI->getType(); |
533 | 98 | Constant *OpArg = OP->GetU32Const((unsigned)OpCode); |
534 | 98 | Value *Args[] = {OpArg, Src}; |
535 | | |
536 | 98 | return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder); |
537 | 98 | } |
538 | | |
539 | | Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op, |
540 | 7.60k | IRBuilder<> &Builder) { |
541 | 7.60k | Type *Ty = Src->getType(); |
542 | | |
543 | 7.60k | Constant *OpArg = Op->GetU32Const((unsigned)OpCode); |
544 | 7.60k | Value *Args[] = {OpArg, Src}; |
545 | | |
546 | 7.60k | return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder); |
547 | 7.60k | } |
548 | | |
549 | | Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, |
550 | 7.06k | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
551 | 7.06k | Type *Ty = src0->getType(); |
552 | | |
553 | 7.06k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
554 | 7.06k | Value *args[] = {opArg, src0, src1}; |
555 | | |
556 | 7.06k | return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
557 | 7.06k | } |
558 | | |
559 | | Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, |
560 | | Value *src2, hlsl::OP *hlslOP, |
561 | 12.9k | IRBuilder<> &Builder) { |
562 | 12.9k | Type *Ty = src0->getType(); |
563 | | |
564 | 12.9k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
565 | 12.9k | Value *args[] = {opArg, src0, src1, src2}; |
566 | | |
567 | 12.9k | return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
568 | 12.9k | } |
569 | | |
570 | | // Translate call that trivially converts to a dxil unary operation by passing |
571 | | // argument, return type, and helper objects to either scalarizing or native |
572 | | // vector dxil operation creation depending on version and vector size. |
573 | | Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
574 | | HLOperationLowerHelper &helper, |
575 | | HLObjectOperationLowerHelper *pObjHelper, |
576 | 4.34k | bool &Translated) { |
577 | 4.34k | Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
578 | 4.34k | IRBuilder<> Builder(CI); |
579 | 4.34k | hlsl::OP *hlslOP = &helper.hlslOP; |
580 | | |
581 | 4.34k | return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder); |
582 | 4.34k | } |
583 | | |
584 | | // Translate call that trivially converts to a dxil binary operation by passing |
585 | | // arguments, return type, and helper objects to either scalarizing or native |
586 | | // vector dxil operation creation depending on version and vector size. |
587 | | Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
588 | | HLOperationLowerHelper &helper, |
589 | | HLObjectOperationLowerHelper *pObjHelper, |
590 | 2.49k | bool &Translated) { |
591 | 2.49k | hlsl::OP *hlslOP = &helper.hlslOP; |
592 | 2.49k | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
593 | 2.49k | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
594 | 2.49k | IRBuilder<> Builder(CI); |
595 | | |
596 | 2.49k | Value *binOp = |
597 | 2.49k | TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder); |
598 | 2.49k | return binOp; |
599 | 2.49k | } |
600 | | |
601 | | // Translate call that trivially converts to a dxil trinary (aka tertiary) |
602 | | // operation by passing arguments, return type, and helper objects to either |
603 | | // scalarizing or native vector dxil operation creation depending on version |
604 | | // and vector size. |
605 | | Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
606 | | HLOperationLowerHelper &helper, |
607 | | HLObjectOperationLowerHelper *pObjHelper, |
608 | 12.0k | bool &Translated) { |
609 | 12.0k | hlsl::OP *hlslOP = &helper.hlslOP; |
610 | 12.0k | Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
611 | 12.0k | Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
612 | 12.0k | Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
613 | 12.0k | IRBuilder<> Builder(CI); |
614 | | |
615 | 12.0k | Value *triOp = |
616 | 12.0k | TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder); |
617 | 12.0k | return triOp; |
618 | 12.0k | } |
619 | | |
620 | | Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
621 | | HLOperationLowerHelper &helper, |
622 | | HLObjectOperationLowerHelper *pObjHelper, |
623 | 174 | bool &Translated) { |
624 | 174 | hlsl::OP *hlslOP = &helper.hlslOP; |
625 | 174 | Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
626 | 174 | IRBuilder<> Builder(CI); |
627 | | |
628 | 174 | Type *Ty = src->getType(); |
629 | 174 | Type *RetTy = Type::getInt1Ty(CI->getContext()); |
630 | 174 | if (Ty->isVectorTy()) |
631 | 142 | RetTy = VectorType::get(RetTy, Ty->getVectorNumElements()); |
632 | | |
633 | 174 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
634 | 174 | Value *args[] = {opArg, src}; |
635 | | |
636 | 174 | return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder); |
637 | 174 | } |
638 | | |
639 | 120 | bool IsResourceGEP(GetElementPtrInst *I) { |
640 | 120 | Type *Ty = I->getType()->getPointerElementType(); |
641 | 120 | Ty = dxilutil::GetArrayEltTy(Ty); |
642 | | // Only mark on GEP which point to resource. |
643 | 120 | return dxilutil::IsHLSLResourceType(Ty); |
644 | 120 | } |
645 | | |
646 | | Value *TranslateNonUniformResourceIndex( |
647 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
648 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
649 | 196 | bool &Translated) { |
650 | 196 | Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
651 | 196 | Type *hdlTy = helper.hlslOP.GetHandleType(); |
652 | 212 | for (User *U : CI->users()) { |
653 | 212 | if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) { |
654 | | // Only mark on GEP which point to resource. |
655 | 108 | if (IsResourceGEP(I)) |
656 | 100 | DxilMDHelper::MarkNonUniform(I); |
657 | 108 | } else if (CastInst *104 castI104 = dyn_cast<CastInst>(U)) { |
658 | 40 | for (User *castU : castI->users()) { |
659 | 40 | if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) { |
660 | | // Only mark on GEP which point to resource. |
661 | 12 | if (IsResourceGEP(I)) |
662 | 12 | DxilMDHelper::MarkNonUniform(I); |
663 | 28 | } else if (CallInst *CI = dyn_cast<CallInst>(castU)) { |
664 | 28 | if (CI->getType() == hdlTy) |
665 | 28 | DxilMDHelper::MarkNonUniform(CI); |
666 | 28 | } |
667 | 40 | } |
668 | 64 | } else if (CallInst *CI = dyn_cast<CallInst>(U)) { |
669 | 64 | if (CI->getType() == hdlTy) |
670 | 44 | DxilMDHelper::MarkNonUniform(CI); |
671 | 64 | } |
672 | 212 | } |
673 | 196 | CI->replaceAllUsesWith(V); |
674 | 196 | return nullptr; |
675 | 196 | } |
676 | | |
677 | | Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
678 | | HLOperationLowerHelper &helper, |
679 | | HLObjectOperationLowerHelper *pObjHelper, |
680 | 1.55k | bool &Translated) { |
681 | 1.55k | hlsl::OP *OP = &helper.hlslOP; |
682 | 1.55k | Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType()); |
683 | 1.55k | Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier); |
684 | | |
685 | 1.55k | unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal); |
686 | 1.55k | unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence); |
687 | 1.55k | unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup); |
688 | | // unsigned ut = |
689 | | // static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup); |
690 | | |
691 | 1.55k | unsigned barrierMode = 0; |
692 | 1.55k | switch (IOP) { |
693 | 8 | case IntrinsicOp::IOP_AllMemoryBarrier: |
694 | 8 | barrierMode = uglobal | g; |
695 | 8 | break; |
696 | 16 | case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync: |
697 | 16 | barrierMode = uglobal | g | t; |
698 | 16 | break; |
699 | 32 | case IntrinsicOp::IOP_GroupMemoryBarrier: |
700 | 32 | barrierMode = g; |
701 | 32 | break; |
702 | 1.46k | case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync: |
703 | 1.46k | barrierMode = g | t; |
704 | 1.46k | break; |
705 | 24 | case IntrinsicOp::IOP_DeviceMemoryBarrier: |
706 | 24 | barrierMode = uglobal; |
707 | 24 | break; |
708 | 8 | case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync: |
709 | 8 | barrierMode = uglobal | t; |
710 | 8 | break; |
711 | 0 | default: |
712 | 0 | DXASSERT(0, "invalid opcode for barrier"); |
713 | 0 | break; |
714 | 1.55k | } |
715 | 1.55k | Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode)); |
716 | | |
717 | 1.55k | Value *args[] = {opArg, src0}; |
718 | | |
719 | 1.55k | IRBuilder<> Builder(CI); |
720 | 1.55k | Builder.CreateCall(dxilFunc, args); |
721 | 1.55k | return nullptr; |
722 | 1.55k | } |
723 | | |
724 | | Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP, |
725 | | OP::OpCode opcode, |
726 | | HLOperationLowerHelper &helper, |
727 | | HLObjectOperationLowerHelper *pObjHelper, |
728 | 32 | bool &Translated) { |
729 | 32 | IRBuilder<> Builder(CI); |
730 | 32 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
731 | 32 | Type *Ty = val->getType(); |
732 | | |
733 | | // Use the same scaling factor used by FXC (i.e., 255.001953) |
734 | | // Excerpt from stackoverflow discussion: |
735 | | // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5" |
736 | 32 | Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255.001953); |
737 | | |
738 | 32 | if (Ty->isVectorTy()) { |
739 | 32 | static constexpr int supportedVecElemCount = 4; |
740 | 32 | if (Ty->getVectorNumElements() != supportedVecElemCount) { |
741 | 0 | llvm_unreachable( |
742 | 0 | "Unsupported input type for intrinsic D3DColorToUByte4."); |
743 | 0 | return UndefValue::get(CI->getType()); |
744 | 0 | } |
745 | | |
746 | 32 | toByteConst = ConstantVector::getSplat(supportedVecElemCount, toByteConst); |
747 | | // Swizzle the input val -> val.zyxw |
748 | 32 | SmallVector<int, 4> mask{2, 1, 0, 3}; |
749 | 32 | val = Builder.CreateShuffleVector(val, val, mask); |
750 | 32 | } |
751 | | |
752 | 32 | Value *byte4 = Builder.CreateFMul(toByteConst, val); |
753 | 32 | return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType()); |
754 | 32 | } |
755 | | |
756 | | // Returns true if pow can be implemented using Fxc's mul-only code gen pattern. |
757 | | // Fxc uses the below rules when choosing mul-only code gen pattern to implement |
758 | | // pow function. Rule 1: Applicable only to power values in the range |
759 | | // [INT32_MIN, INT32_MAX] Rule 2: The maximum number of mul ops needed shouldn't |
760 | | // exceed (2n+1) or (n+1) based on whether the power |
761 | | // is a positive or a negative value. Here "n" is the number of scalar |
762 | | // elements in power. |
763 | | // Rule 3: Power must be an exact value. |
764 | | // +----------+---------------------+------------------+ |
765 | | // | BaseType | IsExponentPositive | MaxMulOpsAllowed | |
766 | | // +----------+---------------------+------------------+ |
767 | | // | float4x4 | True | 33 | |
768 | | // | float4x4 | False | 17 | |
769 | | // | float4x2 | True | 17 | |
770 | | // | float4x2 | False | 9 | |
771 | | // | float2x4 | True | 17 | |
772 | | // | float2x4 | False | 9 | |
773 | | // | float4 | True | 9 | |
774 | | // | float4 | False | 5 | |
775 | | // | float2 | True | 5 | |
776 | | // | float2 | False | 3 | |
777 | | // | float | True | 3 | |
778 | | // | float | False | 2 | |
779 | | // +----------+---------------------+------------------+ |
780 | | |
781 | | bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, |
782 | 1.45k | int32_t &powI) { |
783 | | // Applicable only when power is a literal. |
784 | 1.45k | if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)262 ) { |
785 | 74 | return false; |
786 | 74 | } |
787 | | |
788 | | // Only apply this code gen on splat values. |
789 | 1.38k | if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) { |
790 | 1.19k | if (!hlsl::dxilutil::IsSplat(cdv)) { |
791 | 8 | return false; |
792 | 8 | } |
793 | 1.19k | } |
794 | | |
795 | | // Only apply on aggregates of 16 or fewer elements, |
796 | | // representing the max 4x4 matrix size. |
797 | 1.37k | Type *Ty = x->getType(); |
798 | 1.37k | if (Ty->isVectorTy() && Ty->getVectorNumElements() > 161.18k ) |
799 | 0 | return false; |
800 | | |
801 | 1.37k | APFloat powAPF = isa<ConstantDataVector>(pow) |
802 | 1.37k | ? cast<ConstantDataVector>(pow)->getElementAsAPFloat(0)1.18k |
803 | 1.37k | : // should be a splat value |
804 | 1.37k | cast<ConstantFP>(pow)->getValueAPF()188 ; |
805 | 1.37k | APSInt powAPS(32, false); |
806 | 1.37k | bool isExact = false; |
807 | | // Try converting float value of power to integer and also check if the float |
808 | | // value is exact. |
809 | 1.37k | APFloat::opStatus status = |
810 | 1.37k | powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact); |
811 | 1.37k | if (status == APFloat::opStatus::opOK && isExact348 ) { |
812 | 340 | powI = powAPS.getExtValue(); |
813 | 340 | uint32_t powU = abs(powI); |
814 | 340 | int setBitCount = 0; |
815 | 340 | int maxBitSetPos = -1; |
816 | 11.2k | for (int i = 0; i < 32; i++10.8k ) { |
817 | 10.8k | if ((powU >> i) & 1) { |
818 | 548 | setBitCount++; |
819 | 548 | maxBitSetPos = i; |
820 | 548 | } |
821 | 10.8k | } |
822 | | |
823 | 340 | DXASSERT(maxBitSetPos <= 30, "msb should always be zero."); |
824 | 340 | unsigned numElem = |
825 | 340 | isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements()152 : 1188 ; |
826 | 340 | int mulOpThreshold = powI < 0 ? numElem + 132 : 2 * numElem + 1308 ; |
827 | 340 | int mulOpNeeded = maxBitSetPos + setBitCount - 1; |
828 | 340 | return mulOpNeeded <= mulOpThreshold; |
829 | 340 | } |
830 | | |
831 | 1.03k | return false; |
832 | 1.37k | } |
833 | | |
834 | | Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<> &Builder, Value *x, |
835 | 184 | const int32_t y) { |
836 | 184 | uint32_t absY = abs(y); |
837 | | // If y is zero then always return 1. |
838 | 184 | if (absY == 0) { |
839 | 8 | return ConstantFP::get(x->getType(), 1); |
840 | 8 | } |
841 | | |
842 | 176 | int lastSetPos = -1; |
843 | 176 | Value *result = nullptr; |
844 | 176 | Value *mul = nullptr; |
845 | 5.80k | for (int i = 0; i < 32; i++5.63k ) { |
846 | 5.63k | if ((absY >> i) & 1) { |
847 | 1.31k | for (int j = i; j > lastSetPos; j--1.00k ) { |
848 | 1.00k | if (!mul) { |
849 | 176 | mul = x; |
850 | 832 | } else { |
851 | 832 | mul = Builder.CreateFMul(mul, mul); |
852 | 832 | } |
853 | 1.00k | } |
854 | | |
855 | 304 | result = (result == nullptr) ? mul176 : Builder.CreateFMul(result, mul)128 ; |
856 | 304 | lastSetPos = i; |
857 | 304 | } |
858 | 5.63k | } |
859 | | |
860 | | // Compute reciprocal for negative power values. |
861 | 176 | if (y < 0) { |
862 | 32 | Value *constOne = ConstantFP::get(x->getType(), 1); |
863 | 32 | result = Builder.CreateFDiv(constOne, result); |
864 | 32 | } |
865 | | |
866 | 176 | return result; |
867 | 184 | } |
868 | | |
869 | | Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<> &Builder, Value *x, |
870 | 1.45k | Value *y, bool isFXCCompatMode = false) { |
871 | | // As applicable implement pow using only mul ops as done by Fxc. |
872 | 1.45k | int32_t p = 0; |
873 | 1.45k | if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) { |
874 | 304 | if (isFXCCompatMode) |
875 | 184 | return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p); |
876 | | // Only take care 2 for it will not affect register pressure. |
877 | 120 | if (p == 2) |
878 | 56 | return Builder.CreateFMul(x, x); |
879 | 120 | } |
880 | | |
881 | | // Default to log-mul-exp pattern if previous scenarios don't apply. |
882 | | // t = log(x); |
883 | 1.21k | Value *logX = |
884 | 1.21k | TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder); |
885 | | // t = y * t; |
886 | 1.21k | Value *mulY = Builder.CreateFMul(logX, y); |
887 | | // pow = exp(t); |
888 | 1.21k | return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder); |
889 | 1.45k | } |
890 | | |
891 | | Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
892 | | HLOperationLowerHelper &helper, |
893 | | HLObjectOperationLowerHelper *pObjHelper, |
894 | 32 | bool &Translated) { |
895 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
896 | 32 | IRBuilder<> Builder(CI); |
897 | 32 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
898 | 32 | Type *Ty = val->getType(); |
899 | 32 | VectorType *VT = dyn_cast<VectorType>(Ty); |
900 | 32 | if (!VT) { |
901 | 0 | dxilutil::EmitErrorOnInstruction( |
902 | 0 | CI, "AddUint64 can only be applied to uint2 and uint4 operands."); |
903 | 0 | return UndefValue::get(Ty); |
904 | 0 | } |
905 | | |
906 | 32 | unsigned size = VT->getNumElements(); |
907 | 32 | if (size != 2 && size != 424 ) { |
908 | 16 | dxilutil::EmitErrorOnInstruction( |
909 | 16 | CI, "AddUint64 can only be applied to uint2 and uint4 operands."); |
910 | 16 | return UndefValue::get(Ty); |
911 | 16 | } |
912 | 16 | Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
913 | 16 | Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
914 | | |
915 | 16 | Value *RetVal = UndefValue::get(Ty); |
916 | | |
917 | 16 | Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty); |
918 | 16 | Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc)); |
919 | 40 | for (unsigned i = 0; i < size; i += 224 ) { |
920 | 24 | Value *low0 = Builder.CreateExtractElement(op0, i); |
921 | 24 | Value *low1 = Builder.CreateExtractElement(op1, i); |
922 | 24 | Value *lowWithC = Builder.CreateCall(AddC, {opArg, low0, low1}); |
923 | 24 | Value *low = Builder.CreateExtractValue(lowWithC, 0); |
924 | 24 | RetVal = Builder.CreateInsertElement(RetVal, low, i); |
925 | | |
926 | 24 | Value *carry = Builder.CreateExtractValue(lowWithC, 1); |
927 | | // Ext i1 to i32 |
928 | 24 | carry = Builder.CreateZExt(carry, helper.i32Ty); |
929 | | |
930 | 24 | Value *hi0 = Builder.CreateExtractElement(op0, i + 1); |
931 | 24 | Value *hi1 = Builder.CreateExtractElement(op1, i + 1); |
932 | 24 | Value *hi = Builder.CreateAdd(hi0, hi1); |
933 | 24 | hi = Builder.CreateAdd(hi, carry); |
934 | 24 | RetVal = Builder.CreateInsertElement(RetVal, hi, i + 1); |
935 | 24 | } |
936 | 16 | return RetVal; |
937 | 32 | } |
938 | | |
939 | 936 | bool IsValidLoadInput(Value *V) { |
940 | | // Must be load input. |
941 | | // TODO: report this error on front-end |
942 | 936 | if (!V || !isa<CallInst>(V)) { |
943 | 12 | return false; |
944 | 12 | } |
945 | 924 | CallInst *CI = cast<CallInst>(V); |
946 | | // Must be immediate. |
947 | 924 | ConstantInt *opArg = |
948 | 924 | cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx)); |
949 | 924 | DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue()); |
950 | 924 | if (op != DXIL::OpCode::LoadInput) { |
951 | 0 | return false; |
952 | 0 | } |
953 | 924 | return true; |
954 | 924 | } |
955 | | |
956 | | // Tunnel through insert/extract element and shuffle to find original source |
957 | | // of scalar value, or specified element (vecIdx) of vector value. |
958 | 936 | Value *FindScalarSource(Value *src, unsigned vecIdx = 0) { |
959 | 936 | Type *srcTy = src->getType()->getScalarType(); |
960 | 6.16k | while (src && !isa<UndefValue>(src)) { |
961 | 6.16k | if (src->getType()->isVectorTy()) { |
962 | 5.10k | if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) { |
963 | 4.18k | unsigned curIdx = (unsigned)cast<ConstantInt>(IE->getOperand(2)) |
964 | 4.18k | ->getUniqueInteger() |
965 | 4.18k | .getLimitedValue(); |
966 | 4.18k | src = IE->getOperand((curIdx == vecIdx) ? 1938 : 03.25k ); |
967 | 4.18k | } else if (ShuffleVectorInst *916 SV916 = dyn_cast<ShuffleVectorInst>(src)) { |
968 | 904 | int newIdx = SV->getMaskValue(vecIdx); |
969 | 904 | if (newIdx < 0) |
970 | 0 | return UndefValue::get(srcTy); |
971 | 904 | vecIdx = (unsigned)newIdx; |
972 | 904 | src = SV->getOperand(0); |
973 | 904 | unsigned numElt = src->getType()->getVectorNumElements(); |
974 | 904 | if (numElt <= vecIdx) { |
975 | 0 | vecIdx -= numElt; |
976 | 0 | src = SV->getOperand(1); |
977 | 0 | } |
978 | 904 | } else { |
979 | 12 | return UndefValue::get(srcTy); // Didn't find it. |
980 | 12 | } |
981 | 5.10k | } else { |
982 | 1.06k | if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(src)) { |
983 | 56 | vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand()) |
984 | 56 | ->getUniqueInteger() |
985 | 56 | .getLimitedValue(); |
986 | 56 | src = EE->getVectorOperand(); |
987 | 1.00k | } else if (hlsl::dxilutil::IsConvergentMarker(src)) { |
988 | 80 | src = hlsl::dxilutil::GetConvergentSource(src); |
989 | 924 | } else { |
990 | 924 | break; // Found it. |
991 | 924 | } |
992 | 1.06k | } |
993 | 6.16k | } |
994 | 924 | return src; |
995 | 936 | } |
996 | | |
997 | | // Finds corresponding inputs, calls translation for each, and returns |
998 | | // resulting vector or scalar. |
999 | | // Uses functor that takes (inputElemID, rowIdx, colIdx), and returns |
1000 | | // translation for one input scalar. |
1001 | | Value *TranslateEvalHelper( |
1002 | | CallInst *CI, Value *val, IRBuilder<> &Builder, |
1003 | 266 | std::function<Value *(Value *, Value *, Value *)> fnTranslateScalarInput) { |
1004 | 266 | Type *Ty = CI->getType(); |
1005 | 266 | Value *result = UndefValue::get(Ty); |
1006 | 266 | if (Ty->isVectorTy()) { |
1007 | 1.10k | for (unsigned i = 0; i < Ty->getVectorNumElements(); ++i882 ) { |
1008 | 894 | Value *InputEl = FindScalarSource(val, i); |
1009 | 894 | if (!IsValidLoadInput(InputEl)) { |
1010 | 12 | dxilutil::EmitErrorOnInstruction( |
1011 | 12 | CI, "attribute evaluation can only be done " |
1012 | 12 | "on values taken directly from inputs."); |
1013 | 12 | return result; |
1014 | 12 | } |
1015 | 882 | CallInst *loadInput = cast<CallInst>(InputEl); |
1016 | 882 | Value *inputElemID = |
1017 | 882 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); |
1018 | 882 | Value *rowIdx = |
1019 | 882 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); |
1020 | 882 | Value *colIdx = |
1021 | 882 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); |
1022 | 882 | Value *Elt = fnTranslateScalarInput(inputElemID, rowIdx, colIdx); |
1023 | 882 | result = Builder.CreateInsertElement(result, Elt, i); |
1024 | 882 | } |
1025 | 224 | } else { |
1026 | 42 | Value *InputEl = FindScalarSource(val); |
1027 | 42 | if (!IsValidLoadInput(InputEl)) { |
1028 | 0 | dxilutil::EmitErrorOnInstruction(CI, |
1029 | 0 | "attribute evaluation can only be done " |
1030 | 0 | "on values taken directly from inputs."); |
1031 | 0 | return result; |
1032 | 0 | } |
1033 | 42 | CallInst *loadInput = cast<CallInst>(InputEl); |
1034 | 42 | Value *inputElemID = |
1035 | 42 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); |
1036 | 42 | Value *rowIdx = |
1037 | 42 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); |
1038 | 42 | Value *colIdx = |
1039 | 42 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); |
1040 | 42 | result = fnTranslateScalarInput(inputElemID, rowIdx, colIdx); |
1041 | 42 | } |
1042 | 254 | return result; |
1043 | 266 | } |
1044 | | |
1045 | | Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1046 | | HLOperationLowerHelper &helper, |
1047 | | HLObjectOperationLowerHelper *pObjHelper, |
1048 | 80 | bool &Translated) { |
1049 | 80 | hlsl::OP *hlslOP = &helper.hlslOP; |
1050 | 80 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1051 | 80 | Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1052 | 80 | IRBuilder<> Builder(CI); |
1053 | 80 | OP::OpCode opcode = OP::OpCode::EvalSampleIndex; |
1054 | 80 | Value *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1055 | 80 | Function *evalFunc = |
1056 | 80 | hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType()); |
1057 | | |
1058 | 80 | return TranslateEvalHelper( |
1059 | 80 | CI, val, Builder, |
1060 | 160 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1061 | 160 | return Builder.CreateCall( |
1062 | 160 | evalFunc, {opArg, inputElemID, rowIdx, colIdx, sampleIdx}); |
1063 | 160 | }); |
1064 | 80 | } |
1065 | | |
1066 | | Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1067 | | HLOperationLowerHelper &helper, |
1068 | | HLObjectOperationLowerHelper *pObjHelper, |
1069 | 16 | bool &Translated) { |
1070 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
1071 | 16 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1072 | 16 | Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1073 | 16 | IRBuilder<> Builder(CI); |
1074 | 16 | Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0); |
1075 | 16 | Value *offsetY = Builder.CreateExtractElement(offset, 1); |
1076 | 16 | OP::OpCode opcode = OP::OpCode::EvalSnapped; |
1077 | 16 | Value *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1078 | 16 | Function *evalFunc = |
1079 | 16 | hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType()); |
1080 | | |
1081 | 16 | return TranslateEvalHelper( |
1082 | 16 | CI, val, Builder, |
1083 | 64 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1084 | 64 | return Builder.CreateCall( |
1085 | 64 | evalFunc, {opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY}); |
1086 | 64 | }); |
1087 | 16 | } |
1088 | | |
1089 | | Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1090 | | HLOperationLowerHelper &helper, |
1091 | | HLObjectOperationLowerHelper *pObjHelper, |
1092 | 88 | bool &Translated) { |
1093 | 88 | hlsl::OP *hlslOP = &helper.hlslOP; |
1094 | 88 | Value *val = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx); |
1095 | 88 | IRBuilder<> Builder(CI); |
1096 | 88 | OP::OpCode opcode = OP::OpCode::EvalCentroid; |
1097 | 88 | Value *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1098 | 88 | Function *evalFunc = |
1099 | 88 | hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType()); |
1100 | | |
1101 | 88 | return TranslateEvalHelper( |
1102 | 88 | CI, val, Builder, |
1103 | 410 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1104 | 410 | return Builder.CreateCall(evalFunc, |
1105 | 410 | {opArg, inputElemID, rowIdx, colIdx}); |
1106 | 410 | }); |
1107 | 88 | } |
1108 | | |
1109 | | /* |
1110 | | HLSL: bool RWDispatchNodeInputRecord<recordType>::FinishedCrossGroupSharing() |
1111 | | DXIL: i1 @dx.op.finishedCrossGroupSharing(i32 %Opcode, |
1112 | | %dx.types.NodeRecordHandle %NodeInputRecordHandle) |
1113 | | */ |
1114 | | Value *TranslateNodeFinishedCrossGroupSharing( |
1115 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1116 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
1117 | 8 | bool &Translated) { |
1118 | 8 | hlsl::OP *OP = &helper.hlslOP; |
1119 | | |
1120 | 8 | Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
1121 | 8 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1122 | 8 | DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); |
1123 | 8 | Value *opArg = OP->GetU32Const((unsigned)op); |
1124 | | |
1125 | 8 | IRBuilder<> Builder(CI); |
1126 | 8 | return Builder.CreateCall(dxilFunc, {opArg, handle}); |
1127 | 8 | } |
1128 | | |
1129 | | /* |
1130 | | HLSL: |
1131 | | bool NodeOutput<recordType>::IsValid() |
1132 | | bool EmptyNodeOutput::IsValid() |
1133 | | DXIL: |
1134 | | i1 @dx.op.nodeOutputIsValid(i32 %Opcode, %dx.types.NodeHandle |
1135 | | %NodeOutputHandle) |
1136 | | */ |
1137 | | Value *TranslateNodeOutputIsValid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1138 | | HLOperationLowerHelper &helper, |
1139 | | HLObjectOperationLowerHelper *pObjHelper, |
1140 | 48 | bool &Translated) { |
1141 | 48 | hlsl::OP *OP = &helper.hlslOP; |
1142 | 48 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1143 | 48 | Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
1144 | 48 | Value *opArg = OP->GetU32Const((unsigned)op); |
1145 | | |
1146 | 48 | IRBuilder<> Builder(CI); |
1147 | 48 | return Builder.CreateCall(dxilFunc, {opArg, handle}); |
1148 | 48 | } |
1149 | | |
1150 | | Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, |
1151 | | OP::OpCode op, |
1152 | | HLOperationLowerHelper &helper, |
1153 | | HLObjectOperationLowerHelper *pObjHelper, |
1154 | 82 | bool &Translated) { |
1155 | 82 | DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate"); |
1156 | 82 | hlsl::OP *hlslOP = &helper.hlslOP; |
1157 | 82 | IRBuilder<> Builder(CI); |
1158 | 82 | Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx); |
1159 | 82 | Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx); |
1160 | 82 | Value *vertexI8Idx = |
1161 | 82 | Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext())); |
1162 | 82 | Value *opArg = hlslOP->GetU32Const((unsigned)op); |
1163 | 82 | Function *evalFunc = hlslOP->GetOpFunc(op, val->getType()->getScalarType()); |
1164 | | |
1165 | 82 | return TranslateEvalHelper( |
1166 | 82 | CI, val, Builder, |
1167 | 290 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1168 | 290 | return Builder.CreateCall( |
1169 | 290 | evalFunc, {opArg, inputElemID, rowIdx, colIdx, vertexI8Idx}); |
1170 | 290 | }); |
1171 | 82 | } |
1172 | | /* |
1173 | | |
1174 | | HLSL: |
1175 | | void Barrier(uint MemoryTypeFlags, uint SemanticFlags) |
1176 | | void Barrier(Object o, uint SemanticFlags) |
1177 | | |
1178 | | All UAVs and/or Node Records by types: |
1179 | | void @dx.op.barrierByMemoryType(i32 %Opcode, |
1180 | | i32 %MemoryTypeFlags, i32 %SemanticFlags) |
1181 | | |
1182 | | UAV by handle: |
1183 | | void @dx.op.barrierByMemoryHandle(i32 %Opcode, |
1184 | | %dx.types.Handle %Object, i32 %SemanticFlags) |
1185 | | |
1186 | | Node Record by handle: |
1187 | | void @dx.op.barrierByMemoryHandle(i32 %Opcode, |
1188 | | %dx.types.NodeRecordHandle %Object, i32 %SemanticFlags) |
1189 | | */ |
1190 | | |
1191 | | Value *TranslateBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1192 | | HLOperationLowerHelper &helper, |
1193 | | HLObjectOperationLowerHelper *pObjHelper, |
1194 | 242 | bool &Translated) { |
1195 | 242 | hlsl::OP *OP = &helper.hlslOP; |
1196 | 242 | Value *HandleOrMemoryFlags = |
1197 | 242 | CI->getArgOperand(HLOperandIndex::kBarrierMemoryTypeFlagsOpIdx); |
1198 | 242 | Value *SemanticFlags = |
1199 | 242 | CI->getArgOperand(HLOperandIndex::kBarrierSemanticFlagsOpIdx); |
1200 | 242 | IRBuilder<> Builder(CI); |
1201 | | |
1202 | 242 | if (HandleOrMemoryFlags->getType()->isIntegerTy()) { |
1203 | 86 | op = OP::OpCode::BarrierByMemoryType; |
1204 | 156 | } else if (HandleOrMemoryFlags->getType() == OP->GetHandleType()) { |
1205 | 80 | op = OP::OpCode::BarrierByMemoryHandle; |
1206 | 80 | } else if (76 HandleOrMemoryFlags->getType() == OP->GetNodeRecordHandleType()76 ) { |
1207 | 76 | op = OP::OpCode::BarrierByNodeRecordHandle; |
1208 | 76 | } else { |
1209 | 0 | DXASSERT(false, "Shouldn't get here"); |
1210 | 0 | } |
1211 | | |
1212 | 242 | Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); |
1213 | 242 | Constant *opArg = OP->GetU32Const((unsigned)op); |
1214 | | |
1215 | 242 | Value *args[] = {opArg, HandleOrMemoryFlags, SemanticFlags}; |
1216 | | |
1217 | 242 | Builder.CreateCall(dxilFunc, args); |
1218 | 242 | return nullptr; |
1219 | 242 | } |
1220 | | |
1221 | | Value *TranslateGetGroupOrThreadNodeOutputRecords( |
1222 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1223 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
1224 | 276 | bool isPerThreadRecord, bool &Translated) { |
1225 | 276 | IRBuilder<> Builder(CI); |
1226 | 276 | hlsl::OP *OP = &helper.hlslOP; |
1227 | 276 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1228 | 276 | Function *dxilFunc = OP->GetOpFunc(op, Builder.getVoidTy()); |
1229 | 276 | Value *opArg = OP->GetU32Const((unsigned)op); |
1230 | 276 | Value *count = |
1231 | 276 | CI->getArgOperand(HLOperandIndex::kAllocateRecordNumRecordsIdx); |
1232 | 276 | Value *perThread = OP->GetI1Const(isPerThreadRecord); |
1233 | | |
1234 | 276 | Value *args[] = {opArg, handle, count, perThread}; |
1235 | | |
1236 | 276 | return Builder.CreateCall(dxilFunc, args); |
1237 | 276 | } |
1238 | | |
1239 | | /* |
1240 | | HLSL: |
1241 | | GroupNodeOutputRecords<recordType> |
1242 | | NodeOutput<recordType>::GetGroupNodeOutputRecords(uint numRecords); DXIL: |
1243 | | %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode, |
1244 | | %dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread) |
1245 | | */ |
1246 | | Value * |
1247 | | TranslateGetGroupNodeOutputRecords(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1248 | | HLOperationLowerHelper &helper, |
1249 | | HLObjectOperationLowerHelper *pObjHelper, |
1250 | 146 | bool &Translated) { |
1251 | 146 | return TranslateGetGroupOrThreadNodeOutputRecords( |
1252 | 146 | CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ false, |
1253 | 146 | Translated); |
1254 | 146 | } |
1255 | | |
1256 | | /* |
1257 | | HLSL: |
1258 | | ThreadNodeOutputRecords<recordType> |
1259 | | NodeOutput<recordType>::GetThreadNodeOutputRecords(uint numRecords) DXIL: |
1260 | | %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode, |
1261 | | %dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread) |
1262 | | */ |
1263 | | Value *TranslateGetThreadNodeOutputRecords( |
1264 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1265 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
1266 | 130 | bool &Translated) { |
1267 | 130 | return TranslateGetGroupOrThreadNodeOutputRecords( |
1268 | 130 | CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ true, |
1269 | 130 | Translated); |
1270 | 130 | } |
1271 | | |
1272 | | /* |
1273 | | HLSL: |
1274 | | uint EmptyNodeInput::Count() |
1275 | | uint GroupNodeInputRecords<recordType>::Count() |
1276 | | uint RWGroupNodeInputRecords<recordType>::Count() |
1277 | | |
1278 | | DXIL: |
1279 | | i32 @dx.op.getInputRecordCount(i32 %Opcode, %dx.types.NodeRecordHandle |
1280 | | %NodeInputHandle) |
1281 | | */ |
1282 | | Value * |
1283 | | TranslateNodeGetInputRecordCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1284 | | HLOperationLowerHelper &helper, |
1285 | | HLObjectOperationLowerHelper *pObjHelper, |
1286 | 30 | bool &Translated) { |
1287 | 30 | hlsl::OP *OP = &helper.hlslOP; |
1288 | | |
1289 | 30 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1290 | 30 | DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); |
1291 | 30 | Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
1292 | 30 | Value *opArg = OP->GetU32Const((unsigned)op); |
1293 | 30 | Value *args[] = {opArg, handle}; |
1294 | | |
1295 | 30 | IRBuilder<> Builder(CI); |
1296 | 30 | return Builder.CreateCall(dxilFunc, args); |
1297 | 30 | } |
1298 | | |
1299 | | Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1300 | | HLOperationLowerHelper &helper, |
1301 | | HLObjectOperationLowerHelper *pObjHelper, |
1302 | 176 | bool &Translated) { |
1303 | 176 | hlsl::OP *hlslOP = &helper.hlslOP; |
1304 | 176 | Type *Ty = Type::getVoidTy(CI->getContext()); |
1305 | | |
1306 | 176 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1307 | 176 | Value *args[] = {opArg}; |
1308 | 176 | IRBuilder<> Builder(CI); |
1309 | 176 | Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
1310 | | |
1311 | 176 | return dxilOp; |
1312 | 176 | } |
1313 | | |
1314 | | Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, |
1315 | | OP::OpCode opcode, |
1316 | | HLOperationLowerHelper &helper, |
1317 | | HLObjectOperationLowerHelper *pObjHelper, |
1318 | 360 | bool &Translated) { |
1319 | 360 | hlsl::OP *hlslOP = &helper.hlslOP; |
1320 | 360 | Type *Ty = CI->getType(); |
1321 | | |
1322 | 360 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1323 | 360 | Value *args[] = {opArg}; |
1324 | 360 | IRBuilder<> Builder(CI); |
1325 | 360 | Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
1326 | | |
1327 | 360 | return dxilOp; |
1328 | 360 | } |
1329 | | |
1330 | | Value *TrivialNoArgWithRetNoOverloadOperation( |
1331 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1332 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
1333 | 20 | bool &Translated) { |
1334 | 20 | hlsl::OP *hlslOP = &helper.hlslOP; |
1335 | 20 | Type *Ty = CI->getType(); |
1336 | | |
1337 | 20 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1338 | 20 | Value *args[] = {opArg}; |
1339 | 20 | IRBuilder<> Builder(CI); |
1340 | 20 | return TrivialDxilOperation(opcode, args, Builder.getVoidTy(), Ty, hlslOP, |
1341 | 20 | Builder); |
1342 | 20 | } |
1343 | | |
1344 | | Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1345 | | HLOperationLowerHelper &helper, |
1346 | | HLObjectOperationLowerHelper *pObjHelper, |
1347 | 16 | bool &Translated) { |
1348 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
1349 | 16 | OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition; |
1350 | 16 | IRBuilder<> Builder(CI); |
1351 | | |
1352 | 16 | Type *Ty = Type::getVoidTy(CI->getContext()); |
1353 | 16 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1354 | | |
1355 | 16 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1356 | 16 | Value *args[] = {opArg, val}; |
1357 | | |
1358 | 16 | Value *samplePos = |
1359 | 16 | TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
1360 | | |
1361 | 16 | Value *result = UndefValue::get(CI->getType()); |
1362 | 16 | Value *samplePosX = Builder.CreateExtractValue(samplePos, 0); |
1363 | 16 | Value *samplePosY = Builder.CreateExtractValue(samplePos, 1); |
1364 | 16 | result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0); |
1365 | 16 | result = Builder.CreateInsertElement(result, samplePosY, 1); |
1366 | 16 | return result; |
1367 | 16 | } |
1368 | | |
1369 | | // val QuadReadLaneAt(val, uint); |
1370 | | Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1371 | | HLOperationLowerHelper &helper, |
1372 | | HLObjectOperationLowerHelper *pObjHelper, |
1373 | 66 | bool &Translated) { |
1374 | 66 | hlsl::OP *hlslOP = &helper.hlslOP; |
1375 | 66 | Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)}; |
1376 | 66 | return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs, |
1377 | 66 | CI->getOperand(1)->getType(), CI, hlslOP); |
1378 | 66 | } |
1379 | | |
1380 | | // Quad intrinsics of the form fn(val,QuadOpKind)->val |
1381 | | Value *TranslateQuadAnyAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1382 | | HLOperationLowerHelper &helper, |
1383 | | HLObjectOperationLowerHelper *pObjHelper, |
1384 | 22 | bool &Translated) { |
1385 | 22 | hlsl::OP *hlslOP = &helper.hlslOP; |
1386 | 22 | DXIL::QuadVoteOpKind opKind; |
1387 | 22 | switch (IOP) { |
1388 | 10 | case IntrinsicOp::IOP_QuadAll: |
1389 | 10 | opKind = DXIL::QuadVoteOpKind::All; |
1390 | 10 | break; |
1391 | 12 | case IntrinsicOp::IOP_QuadAny: |
1392 | 12 | opKind = DXIL::QuadVoteOpKind::Any; |
1393 | 12 | break; |
1394 | 0 | default: |
1395 | 0 | llvm_unreachable( |
1396 | 22 | "QuadAny/QuadAll translation called with wrong isntruction"); |
1397 | 22 | } |
1398 | 22 | Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind); |
1399 | 22 | Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg}; |
1400 | 22 | return TrivialDxilOperation(DXIL::OpCode::QuadVote, refArgs, |
1401 | 22 | CI->getOperand(1)->getType(), CI, hlslOP); |
1402 | 22 | } |
1403 | | |
1404 | | // Wave intrinsics of the form fn(val,QuadOpKind)->val |
1405 | | Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1406 | | HLOperationLowerHelper &helper, |
1407 | | HLObjectOperationLowerHelper *pObjHelper, |
1408 | 102 | bool &Translated) { |
1409 | 102 | hlsl::OP *hlslOP = &helper.hlslOP; |
1410 | 102 | DXIL::QuadOpKind opKind; |
1411 | 102 | switch (IOP) { |
1412 | 34 | case IntrinsicOp::IOP_QuadReadAcrossX: |
1413 | 34 | opKind = DXIL::QuadOpKind::ReadAcrossX; |
1414 | 34 | break; |
1415 | 32 | case IntrinsicOp::IOP_QuadReadAcrossY: |
1416 | 32 | opKind = DXIL::QuadOpKind::ReadAcrossY; |
1417 | 32 | break; |
1418 | 0 | default: |
1419 | 0 | DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal); |
1420 | 0 | LLVM_FALLTHROUGH; |
1421 | 36 | case IntrinsicOp::IOP_QuadReadAcrossDiagonal: |
1422 | 36 | opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; |
1423 | 36 | break; |
1424 | 102 | } |
1425 | 102 | Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind); |
1426 | 102 | Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg}; |
1427 | 102 | return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs, |
1428 | 102 | CI->getOperand(1)->getType(), CI, hlslOP); |
1429 | 102 | } |
1430 | | |
1431 | | // WaveAllEqual(val<n>)->bool<n> |
1432 | | Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1433 | | HLOperationLowerHelper &helper, |
1434 | | HLObjectOperationLowerHelper *pObjHelper, |
1435 | 80 | bool &Translated) { |
1436 | 80 | hlsl::OP *hlslOP = &helper.hlslOP; |
1437 | 80 | Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx); |
1438 | 80 | IRBuilder<> Builder(CI); |
1439 | | |
1440 | 80 | Type *Ty = src->getType(); |
1441 | 80 | Type *RetTy = Type::getInt1Ty(CI->getContext()); |
1442 | 80 | if (Ty->isVectorTy()) |
1443 | 4 | RetTy = VectorType::get(RetTy, Ty->getVectorNumElements()); |
1444 | | |
1445 | 80 | Constant *opArg = |
1446 | 80 | hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual); |
1447 | 80 | Value *args[] = {opArg, src}; |
1448 | | |
1449 | 80 | return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy, |
1450 | 80 | hlslOP, Builder); |
1451 | 80 | } |
1452 | | |
1453 | | static Value *TranslateWaveMatchFixReturn(IRBuilder<> &Builder, Type *TargetTy, |
1454 | 46 | Value *RetVal) { |
1455 | 46 | Value *ResVec = UndefValue::get(TargetTy); |
1456 | 230 | for (unsigned i = 0; i != 4; ++i184 ) { |
1457 | 184 | Value *Elt = Builder.CreateExtractValue(RetVal, i); |
1458 | 184 | ResVec = Builder.CreateInsertElement(ResVec, Elt, i); |
1459 | 184 | } |
1460 | | |
1461 | 46 | return ResVec; |
1462 | 46 | } |
1463 | | |
1464 | | // WaveMatch(val<n>)->uint4 |
1465 | | Value *TranslateWaveMatch(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc, |
1466 | | HLOperationLowerHelper &Helper, |
1467 | | HLObjectOperationLowerHelper *ObjHelper, |
1468 | 46 | bool &Translated) { |
1469 | 46 | hlsl::OP *Op = &Helper.hlslOP; |
1470 | 46 | IRBuilder<> Builder(CI); |
1471 | | |
1472 | 46 | Value *Val = CI->getArgOperand(1); |
1473 | 46 | Type *ValTy = Val->getType(); |
1474 | 46 | Type *EltTy = ValTy->getScalarType(); |
1475 | 46 | Constant *OpcArg = Op->GetU32Const((unsigned)DXIL::OpCode::WaveMatch); |
1476 | | |
1477 | | // If we don't need to scalarize, just emit the call and exit |
1478 | 46 | const bool Scalarize = |
1479 | 46 | ValTy->isVectorTy() && |
1480 | 46 | !Op->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()14 ; |
1481 | 46 | if (!Scalarize) { |
1482 | 36 | Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, ValTy); |
1483 | 36 | Value *Args[] = {OpcArg, Val}; |
1484 | 36 | Value *Ret = Builder.CreateCall(Fn, Args); |
1485 | 36 | return TranslateWaveMatchFixReturn(Builder, CI->getType(), Ret); |
1486 | 36 | } |
1487 | | |
1488 | | // Generate a dx.op.waveMatch call for each scalar in the input, and perform |
1489 | | // a bitwise AND between each result to derive the final bitmask |
1490 | | |
1491 | | // (1) Collect the list of all scalar inputs (e.g. decompose vectors) |
1492 | 10 | SmallVector<Value *, 4> ScalarInputs; |
1493 | | |
1494 | 50 | for (uint64_t I = 0, E = ValTy->getVectorNumElements(); I != E; ++I40 ) { |
1495 | 40 | Value *Elt = Builder.CreateExtractElement(Val, I); |
1496 | 40 | ScalarInputs.push_back(Elt); |
1497 | 40 | } |
1498 | | |
1499 | | // (2) For each scalar, emit a call to dx.op.waveMatch. If this is not the |
1500 | | // first scalar, then AND the result with the accumulator. |
1501 | 10 | Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, EltTy); |
1502 | 10 | Value *Args[] = {OpcArg, ScalarInputs[0]}; |
1503 | 10 | Value *Res = Builder.CreateCall(Fn, Args); |
1504 | | |
1505 | 40 | for (unsigned I = 1, E = ScalarInputs.size(); I != E; ++I30 ) { |
1506 | 30 | Value *Args[] = {OpcArg, ScalarInputs[I]}; |
1507 | 30 | Value *Call = Builder.CreateCall(Fn, Args); |
1508 | | |
1509 | | // Generate bitwise AND of the components |
1510 | 150 | for (unsigned J = 0; J != 4; ++J120 ) { |
1511 | 120 | Value *ResVal = Builder.CreateExtractValue(Res, J); |
1512 | 120 | Value *CallVal = Builder.CreateExtractValue(Call, J); |
1513 | 120 | Value *And = Builder.CreateAnd(ResVal, CallVal); |
1514 | 120 | Res = Builder.CreateInsertValue(Res, And, J); |
1515 | 120 | } |
1516 | 30 | } |
1517 | | |
1518 | | // (3) Convert the final aggregate into a vector to make the types match |
1519 | 10 | return TranslateWaveMatchFixReturn(Builder, CI->getType(), Res); |
1520 | 46 | } |
1521 | | |
1522 | | // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place |
1523 | | Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1524 | | HLOperationLowerHelper &helper, |
1525 | | HLObjectOperationLowerHelper *pObjHelper, |
1526 | 162 | bool &Translated) { |
1527 | 162 | hlsl::OP *hlslOP = &helper.hlslOP; |
1528 | 162 | Value *refArgs[] = {nullptr, CI->getOperand(1)}; |
1529 | 162 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
1530 | 162 | } |
1531 | | // Wave ballot intrinsic. |
1532 | | Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1533 | | HLOperationLowerHelper &helper, |
1534 | | HLObjectOperationLowerHelper *pObjHelper, |
1535 | 32 | bool &Translated) { |
1536 | | // The high-level operation is uint4 ballot(i1). |
1537 | | // The DXIL operation is struct.u4 ballot(i1). |
1538 | | // To avoid updating users with more than a simple replace, we translate into |
1539 | | // a call into struct.u4, then reassemble the vector. |
1540 | | // Scalarization and constant propagation take care of cleanup. |
1541 | 32 | IRBuilder<> B(CI); |
1542 | | |
1543 | | // Make the DXIL call itself. |
1544 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
1545 | 32 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1546 | 32 | Value *refArgs[] = {opArg, CI->getOperand(1)}; |
1547 | 32 | Function *dxilFunc = |
1548 | 32 | hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext())); |
1549 | 32 | Value *dxilVal = |
1550 | 32 | B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode)); |
1551 | | |
1552 | | // Assign from the call results into a vector. |
1553 | 32 | Type *ResTy = CI->getType(); |
1554 | 32 | DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4); |
1555 | 32 | DXASSERT_NOMSG(dxilVal->getType()->isStructTy() && |
1556 | 32 | dxilVal->getType()->getNumContainedTypes() == 4); |
1557 | | |
1558 | | // 'x' component is the first vector element, highest bits. |
1559 | 32 | Value *ResVal = llvm::UndefValue::get(ResTy); |
1560 | 160 | for (unsigned Idx = 0; Idx < 4; ++Idx128 ) { |
1561 | 128 | ResVal = B.CreateInsertElement( |
1562 | 128 | ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx); |
1563 | 128 | } |
1564 | | |
1565 | 32 | return ResVal; |
1566 | 32 | } |
1567 | | |
1568 | 670 | static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) { |
1569 | 670 | return opcode == OP::OpCode::WaveActiveOp || |
1570 | 670 | opcode == OP::OpCode::WavePrefixOp288 ; |
1571 | 670 | } |
1572 | | |
1573 | 946 | static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) { |
1574 | 946 | if (IOP == IntrinsicOp::IOP_WaveActiveUMax || |
1575 | 946 | IOP == IntrinsicOp::IOP_WaveActiveUMin908 || |
1576 | 946 | IOP == IntrinsicOp::IOP_WaveActiveUSum870 || |
1577 | 946 | IOP == IntrinsicOp::IOP_WaveActiveUProduct840 || |
1578 | 946 | IOP == IntrinsicOp::IOP_WaveMultiPrefixUProduct834 || |
1579 | 946 | IOP == IntrinsicOp::IOP_WaveMultiPrefixUSum820 || |
1580 | 946 | IOP == IntrinsicOp::IOP_WavePrefixUSum806 || |
1581 | 946 | IOP == IntrinsicOp::IOP_WavePrefixUProduct776 ) |
1582 | 176 | return (unsigned)DXIL::SignedOpKind::Unsigned; |
1583 | 770 | return (unsigned)DXIL::SignedOpKind::Signed; |
1584 | 946 | } |
1585 | | |
1586 | 946 | static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) { |
1587 | 946 | switch (IOP) { |
1588 | | // Bit operations. |
1589 | 28 | case IntrinsicOp::IOP_WaveActiveBitOr: |
1590 | 28 | return (unsigned)DXIL::WaveBitOpKind::Or; |
1591 | 62 | case IntrinsicOp::IOP_WaveActiveBitAnd: |
1592 | 62 | return (unsigned)DXIL::WaveBitOpKind::And; |
1593 | 44 | case IntrinsicOp::IOP_WaveActiveBitXor: |
1594 | 44 | return (unsigned)DXIL::WaveBitOpKind::Xor; |
1595 | | // Prefix operations. |
1596 | 44 | case IntrinsicOp::IOP_WavePrefixSum: |
1597 | 74 | case IntrinsicOp::IOP_WavePrefixUSum: |
1598 | 74 | return (unsigned)DXIL::WaveOpKind::Sum; |
1599 | 74 | case IntrinsicOp::IOP_WavePrefixProduct: |
1600 | 80 | case IntrinsicOp::IOP_WavePrefixUProduct: |
1601 | 80 | return (unsigned)DXIL::WaveOpKind::Product; |
1602 | | // Numeric operations. |
1603 | 46 | case IntrinsicOp::IOP_WaveActiveMax: |
1604 | 84 | case IntrinsicOp::IOP_WaveActiveUMax: |
1605 | 84 | return (unsigned)DXIL::WaveOpKind::Max; |
1606 | 60 | case IntrinsicOp::IOP_WaveActiveMin: |
1607 | 98 | case IntrinsicOp::IOP_WaveActiveUMin: |
1608 | 98 | return (unsigned)DXIL::WaveOpKind::Min; |
1609 | 90 | case IntrinsicOp::IOP_WaveActiveSum: |
1610 | 120 | case IntrinsicOp::IOP_WaveActiveUSum: |
1611 | 120 | return (unsigned)DXIL::WaveOpKind::Sum; |
1612 | 74 | case IntrinsicOp::IOP_WaveActiveProduct: |
1613 | 80 | case IntrinsicOp::IOP_WaveActiveUProduct: |
1614 | | // MultiPrefix operations |
1615 | 124 | case IntrinsicOp::IOP_WaveMultiPrefixBitAnd: |
1616 | 124 | return (unsigned)DXIL::WaveMultiPrefixOpKind::And; |
1617 | 44 | case IntrinsicOp::IOP_WaveMultiPrefixBitOr: |
1618 | 44 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Or; |
1619 | 44 | case IntrinsicOp::IOP_WaveMultiPrefixBitXor: |
1620 | 44 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Xor; |
1621 | 58 | case IntrinsicOp::IOP_WaveMultiPrefixProduct: |
1622 | 72 | case IntrinsicOp::IOP_WaveMultiPrefixUProduct: |
1623 | 72 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Product; |
1624 | 58 | case IntrinsicOp::IOP_WaveMultiPrefixSum: |
1625 | 72 | case IntrinsicOp::IOP_WaveMultiPrefixUSum: |
1626 | 72 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Sum; |
1627 | 0 | default: |
1628 | 0 | DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct || |
1629 | 0 | IOP == IntrinsicOp::IOP_WaveActiveUProduct, |
1630 | 0 | "else caller passed incorrect value"); |
1631 | 0 | return (unsigned)DXIL::WaveOpKind::Product; |
1632 | 946 | } |
1633 | 946 | } |
1634 | | |
1635 | | // Wave intrinsics of the form fn(valA)->valA |
1636 | | Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1637 | | HLOperationLowerHelper &helper, |
1638 | | HLObjectOperationLowerHelper *pObjHelper, |
1639 | 670 | bool &Translated) { |
1640 | 670 | hlsl::OP *hlslOP = &helper.hlslOP; |
1641 | | |
1642 | 670 | Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP)); |
1643 | 670 | Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP)); |
1644 | 670 | Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt}; |
1645 | 670 | unsigned refArgCount = _countof(refArgs); |
1646 | 670 | if (!WaveIntrinsicNeedsSign(opcode)) |
1647 | 134 | refArgCount--; |
1648 | 670 | return TrivialDxilOperation(opcode, |
1649 | 670 | llvm::ArrayRef<Value *>(refArgs, refArgCount), |
1650 | 670 | CI->getOperand(1)->getType(), CI, hlslOP); |
1651 | 670 | } |
1652 | | |
1653 | | // WaveMultiPrefixOP(val<n>, mask) -> val<n> |
1654 | | Value *TranslateWaveMultiPrefix(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc, |
1655 | | HLOperationLowerHelper &Helper, |
1656 | | HLObjectOperationLowerHelper *ObjHelper, |
1657 | 276 | bool &Translated) { |
1658 | 276 | hlsl::OP *Op = &Helper.hlslOP; |
1659 | | |
1660 | 276 | Constant *KindValInt = Op->GetI8Const(WaveIntrinsicToOpKind(IOP)); |
1661 | 276 | Constant *SignValInt = Op->GetI8Const(WaveIntrinsicToSignedOpKind(IOP)); |
1662 | | |
1663 | | // Decompose mask into scalars |
1664 | 276 | IRBuilder<> Builder(CI); |
1665 | 276 | Value *Mask = CI->getArgOperand(2); |
1666 | 276 | Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0); |
1667 | 276 | Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1); |
1668 | 276 | Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2); |
1669 | 276 | Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3); |
1670 | | |
1671 | 276 | Value *Args[] = {nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, |
1672 | 276 | Mask3, KindValInt, SignValInt}; |
1673 | | |
1674 | 276 | return TrivialDxilOperation(Opc, Args, CI->getOperand(1)->getType(), CI, Op); |
1675 | 276 | } |
1676 | | |
1677 | | // WaveMultiPrefixBitCount(i1, mask) -> i32 |
1678 | | Value *TranslateWaveMultiPrefixBitCount(CallInst *CI, IntrinsicOp IOP, |
1679 | | OP::OpCode Opc, |
1680 | | HLOperationLowerHelper &Helper, |
1681 | | HLObjectOperationLowerHelper *ObjHelper, |
1682 | 40 | bool &Translated) { |
1683 | 40 | hlsl::OP *Op = &Helper.hlslOP; |
1684 | | |
1685 | | // Decompose mask into scalars |
1686 | 40 | IRBuilder<> Builder(CI); |
1687 | 40 | Value *Mask = CI->getArgOperand(2); |
1688 | 40 | Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0); |
1689 | 40 | Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1); |
1690 | 40 | Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2); |
1691 | 40 | Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3); |
1692 | | |
1693 | 40 | Value *Args[] = {nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, Mask3}; |
1694 | | |
1695 | 40 | return TrivialDxilOperation(Opc, Args, Helper.voidTy, CI, Op); |
1696 | 40 | } |
1697 | | |
1698 | | // Wave intrinsics of the form fn()->val |
1699 | | Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1700 | | HLOperationLowerHelper &helper, |
1701 | | HLObjectOperationLowerHelper *pObjHelper, |
1702 | 164 | bool &Translated) { |
1703 | 164 | hlsl::OP *hlslOP = &helper.hlslOP; |
1704 | 164 | Value *refArgs[] = {nullptr}; |
1705 | 164 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
1706 | 164 | } |
1707 | | |
1708 | | // Wave intrinsics of the form fn(val,lane)->val |
1709 | | Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1710 | | HLOperationLowerHelper &helper, |
1711 | | HLObjectOperationLowerHelper *pObjHelper, |
1712 | 98 | bool &Translated) { |
1713 | 98 | hlsl::OP *hlslOP = &helper.hlslOP; |
1714 | 98 | Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)}; |
1715 | 98 | return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs, |
1716 | 98 | CI->getOperand(1)->getType(), CI, hlslOP); |
1717 | 98 | } |
1718 | | |
1719 | | // Wave intrinsics of the form fn(val)->val |
1720 | | Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP, |
1721 | | OP::OpCode opcode, |
1722 | | HLOperationLowerHelper &helper, |
1723 | | HLObjectOperationLowerHelper *pObjHelper, |
1724 | 274 | bool &Translated) { |
1725 | 274 | hlsl::OP *hlslOP = &helper.hlslOP; |
1726 | 274 | Value *refArgs[] = {nullptr, CI->getOperand(1)}; |
1727 | 274 | return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs, |
1728 | 274 | CI->getOperand(1)->getType(), CI, hlslOP); |
1729 | 274 | } |
1730 | | |
1731 | | Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1732 | | HLOperationLowerHelper &helper, |
1733 | | HLObjectOperationLowerHelper *pObjHelper, |
1734 | 950 | bool &Translated) { |
1735 | 950 | hlsl::OP *hlslOP = &helper.hlslOP; |
1736 | 950 | Type *pOverloadTy = CI->getType()->getScalarType(); |
1737 | 950 | if (pOverloadTy->isFloatingPointTy()) { |
1738 | 804 | Value *refArgs[] = {nullptr, CI->getOperand(1)}; |
1739 | 804 | return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI, |
1740 | 804 | hlslOP); |
1741 | 804 | } |
1742 | | |
1743 | 146 | Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1744 | 146 | IRBuilder<> Builder(CI); |
1745 | 146 | Value *neg = Builder.CreateNeg(src); |
1746 | 146 | return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP, |
1747 | 146 | Builder); |
1748 | 950 | } |
1749 | | |
1750 | | Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1751 | | HLOperationLowerHelper &helper, |
1752 | | HLObjectOperationLowerHelper *pObjHelper, |
1753 | 24 | bool &Translated) { |
1754 | 24 | return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op |
1755 | 24 | } |
1756 | | |
1757 | 28 | Value *GenerateVectorCmpNEZero(Value *Val, IRBuilder<> Builder) { |
1758 | 28 | Type *Ty = Val->getType(); |
1759 | 28 | Type *EltTy = Ty->getScalarType(); |
1760 | | |
1761 | 28 | Value *ZeroInit = ConstantAggregateZero::get(Ty); |
1762 | | |
1763 | 28 | if (EltTy->isFloatingPointTy()) |
1764 | 4 | return Builder.CreateFCmpUNE(Val, ZeroInit); |
1765 | | |
1766 | 24 | return Builder.CreateICmpNE(Val, ZeroInit); |
1767 | 28 | } |
1768 | | |
1769 | 1.32k | Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) { |
1770 | 1.32k | Type *Ty = val->getType(); |
1771 | 1.32k | Type *EltTy = Ty->getScalarType(); |
1772 | | |
1773 | 1.32k | Constant *zero = nullptr; |
1774 | 1.32k | if (EltTy->isFloatingPointTy()) |
1775 | 104 | zero = ConstantFP::get(EltTy, 0); |
1776 | 1.21k | else |
1777 | 1.21k | zero = ConstantInt::get(EltTy, 0); |
1778 | | |
1779 | 1.32k | if (Ty != EltTy) |
1780 | 0 | zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero); |
1781 | | |
1782 | 1.32k | if (EltTy->isFloatingPointTy()) |
1783 | 104 | return Builder.CreateFCmpUNE(val, zero); |
1784 | | |
1785 | 1.21k | return Builder.CreateICmpNE(val, zero); |
1786 | 1.32k | } |
1787 | | |
1788 | | Value *TranslateBitwisePredicate(CallInst *CI, IntrinsicOp IOP, |
1789 | 352 | hlsl::OP *HlslOP) { |
1790 | 352 | Value *Arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1791 | 352 | IRBuilder<> Builder(CI); |
1792 | | |
1793 | 352 | Type *Ty = Arg->getType(); |
1794 | 352 | Type *EltTy = Ty->getScalarType(); |
1795 | | |
1796 | 352 | if (Ty == EltTy) |
1797 | 42 | return GenerateCmpNEZero(Arg, Builder); |
1798 | | |
1799 | 310 | if (HlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()) { |
1800 | 28 | DXIL::OpCode ReduceOp = DXIL::OpCode::VectorReduceAnd; |
1801 | 28 | switch (IOP) { |
1802 | 14 | case IntrinsicOp::IOP_all: |
1803 | 14 | ReduceOp = DXIL::OpCode::VectorReduceAnd; |
1804 | 14 | break; |
1805 | 14 | case IntrinsicOp::IOP_any: |
1806 | 14 | ReduceOp = DXIL::OpCode::VectorReduceOr; |
1807 | 14 | break; |
1808 | 0 | default: |
1809 | 0 | assert(false && "Unexpected reduction IOP"); |
1810 | 0 | break; |
1811 | 28 | } |
1812 | | |
1813 | | // Compare each element to zero |
1814 | 28 | Value *VecCmpZero = GenerateVectorCmpNEZero(Arg, Builder); |
1815 | 28 | Type *VecCmpTy = VecCmpZero->getType(); |
1816 | | |
1817 | | // Reduce the vector with the appropiate op |
1818 | 28 | Constant *OpArg = HlslOP->GetU32Const((unsigned)ReduceOp); |
1819 | 28 | Value *Args[] = {OpArg, VecCmpZero}; |
1820 | 28 | Function *DxilFunc = HlslOP->GetOpFunc(ReduceOp, VecCmpTy); |
1821 | 28 | return TrivialDxilVectorOperation(DxilFunc, ReduceOp, Args, VecCmpTy, |
1822 | 28 | HlslOP, Builder); |
1823 | 28 | } |
1824 | | |
1825 | 282 | SmallVector<Value *, 4> EltIsNEZero; |
1826 | 1.56k | for (unsigned I = 0; I < Ty->getVectorNumElements(); I++1.28k ) { |
1827 | 1.28k | Value *Elt = Builder.CreateExtractElement(Arg, I); |
1828 | 1.28k | Elt = GenerateCmpNEZero(Elt, Builder); |
1829 | 1.28k | EltIsNEZero.push_back(Elt); |
1830 | 1.28k | } |
1831 | | |
1832 | | // and/or the components together |
1833 | 282 | Value *Reduce = EltIsNEZero[0]; |
1834 | 1.28k | for (unsigned I = 1; I < EltIsNEZero.size(); I++998 ) { |
1835 | 998 | Value *Elt = EltIsNEZero[I]; |
1836 | 998 | switch (IOP) { |
1837 | 426 | case IntrinsicOp::IOP_all: |
1838 | 426 | Reduce = Builder.CreateAnd(Reduce, Elt); |
1839 | 426 | break; |
1840 | 572 | case IntrinsicOp::IOP_any: |
1841 | 572 | Reduce = Builder.CreateOr(Reduce, Elt); |
1842 | 572 | break; |
1843 | 0 | default: |
1844 | 0 | assert(false && "Unexpected reduction IOP"); |
1845 | 0 | break; |
1846 | 998 | } |
1847 | 998 | } |
1848 | | |
1849 | 282 | return Reduce; |
1850 | 282 | } |
1851 | | |
1852 | | Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
1853 | | HLOperationLowerHelper &Helper, |
1854 | | HLObjectOperationLowerHelper *PObjHelper, |
1855 | 156 | bool &Translated) { |
1856 | 156 | return TranslateBitwisePredicate(CI, IOP, &Helper.hlslOP); |
1857 | 156 | } |
1858 | | |
1859 | | Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
1860 | | HLOperationLowerHelper &Helper, |
1861 | | HLObjectOperationLowerHelper *PObjHelper, |
1862 | 196 | bool &Translated) { |
1863 | 196 | return TranslateBitwisePredicate(CI, IOP, &Helper.hlslOP); |
1864 | 196 | } |
1865 | | |
1866 | | Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1867 | | HLOperationLowerHelper &helper, |
1868 | | HLObjectOperationLowerHelper *pObjHelper, |
1869 | 1.84k | bool &Translated) { |
1870 | 1.84k | Type *Ty = CI->getType(); |
1871 | 1.84k | Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1872 | 1.84k | IRBuilder<> Builder(CI); |
1873 | 1.84k | return Builder.CreateBitCast(op, Ty); |
1874 | 1.84k | } |
1875 | | |
1876 | | Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi, |
1877 | 32 | IRBuilder<> &Builder, hlsl::OP *hlslOP) { |
1878 | 32 | Type *Ty = x->getType(); |
1879 | 32 | Type *outTy = lo->getType()->getPointerElementType(); |
1880 | 32 | DXIL::OpCode opcode = DXIL::OpCode::SplitDouble; |
1881 | | |
1882 | 32 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); |
1883 | 32 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
1884 | | |
1885 | 32 | if (Ty->isVectorTy()) { |
1886 | 8 | Value *retValLo = llvm::UndefValue::get(outTy); |
1887 | 8 | Value *retValHi = llvm::UndefValue::get(outTy); |
1888 | 8 | unsigned vecSize = Ty->getVectorNumElements(); |
1889 | | |
1890 | 24 | for (unsigned i = 0; i < vecSize; i++16 ) { |
1891 | 16 | Value *Elt = Builder.CreateExtractElement(x, i); |
1892 | 16 | Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt}, |
1893 | 16 | hlslOP->GetOpCodeName(opcode)); |
1894 | 16 | Value *EltLo = Builder.CreateExtractValue(EltOP, 0); |
1895 | 16 | retValLo = Builder.CreateInsertElement(retValLo, EltLo, i); |
1896 | 16 | Value *EltHi = Builder.CreateExtractValue(EltOP, 1); |
1897 | 16 | retValHi = Builder.CreateInsertElement(retValHi, EltHi, i); |
1898 | 16 | } |
1899 | 8 | Builder.CreateStore(retValLo, lo); |
1900 | 8 | Builder.CreateStore(retValHi, hi); |
1901 | 24 | } else { |
1902 | 24 | Value *retVal = |
1903 | 24 | Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode)); |
1904 | 24 | Value *retValLo = Builder.CreateExtractValue(retVal, 0); |
1905 | 24 | Value *retValHi = Builder.CreateExtractValue(retVal, 1); |
1906 | 24 | Builder.CreateStore(retValLo, lo); |
1907 | 24 | Builder.CreateStore(retValHi, hi); |
1908 | 24 | } |
1909 | | |
1910 | 32 | return nullptr; |
1911 | 32 | } |
1912 | | |
1913 | | Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1914 | | HLOperationLowerHelper &helper, |
1915 | | HLObjectOperationLowerHelper *pObjHelper, |
1916 | 608 | bool &Translated) { |
1917 | 608 | if (CI->getNumArgOperands() == 2) |
1918 | 576 | return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated); |
1919 | | |
1920 | 32 | DXASSERT_NOMSG(CI->getNumArgOperands() == 4); |
1921 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
1922 | 32 | Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
1923 | 32 | DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy()); |
1924 | 32 | Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
1925 | 32 | Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
1926 | 32 | IRBuilder<> Builder(CI); |
1927 | 32 | return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP); |
1928 | 608 | } |
1929 | | |
1930 | | Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1931 | | HLOperationLowerHelper &helper, |
1932 | | HLObjectOperationLowerHelper *pObjHelper, |
1933 | 66 | bool &Translated) { |
1934 | 66 | hlsl::OP *hlslOP = &helper.hlslOP; |
1935 | 66 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1936 | 66 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1937 | | |
1938 | 66 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
1939 | 66 | IRBuilder<> Builder(CI); |
1940 | 66 | return TrivialDxilOperation(opcode, {opArg, x, y}, CI->getType(), |
1941 | 66 | CI->getType(), hlslOP, Builder); |
1942 | 66 | } |
1943 | | |
1944 | | Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1945 | | HLOperationLowerHelper &helper, |
1946 | | HLObjectOperationLowerHelper *pObjHelper, |
1947 | 56 | bool &Translated) { |
1948 | 56 | hlsl::OP *hlslOP = &helper.hlslOP; |
1949 | 56 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1950 | 56 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1951 | | |
1952 | 56 | IRBuilder<> Builder(CI); |
1953 | 56 | Value *tan = Builder.CreateFDiv(y, x); |
1954 | | |
1955 | 56 | Value *atan = |
1956 | 56 | TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder); |
1957 | | // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. |
1958 | 56 | Type *Ty = x->getType(); |
1959 | 56 | Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI); |
1960 | 56 | Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2); |
1961 | 56 | Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2); |
1962 | 56 | Constant *zero = ConstantFP::get(Ty->getScalarType(), 0); |
1963 | 56 | if (Ty->isVectorTy()) { |
1964 | 22 | unsigned vecSize = Ty->getVectorNumElements(); |
1965 | 22 | pi = ConstantVector::getSplat(vecSize, pi); |
1966 | 22 | halfPi = ConstantVector::getSplat(vecSize, halfPi); |
1967 | 22 | negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi); |
1968 | 22 | zero = ConstantVector::getSplat(vecSize, zero); |
1969 | 22 | } |
1970 | 56 | Value *atanAddPi = Builder.CreateFAdd(atan, pi); |
1971 | 56 | Value *atanSubPi = Builder.CreateFSub(atan, pi); |
1972 | | |
1973 | | // x > 0 -> atan. |
1974 | 56 | Value *result = atan; |
1975 | 56 | Value *xLt0 = Builder.CreateFCmpOLT(x, zero); |
1976 | 56 | Value *xEq0 = Builder.CreateFCmpOEQ(x, zero); |
1977 | | |
1978 | 56 | Value *yGe0 = Builder.CreateFCmpOGE(y, zero); |
1979 | 56 | Value *yLt0 = Builder.CreateFCmpOLT(y, zero); |
1980 | | // x < 0, y >= 0 -> atan + pi. |
1981 | 56 | Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0); |
1982 | 56 | result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result); |
1983 | | |
1984 | | // x < 0, y < 0 -> atan - pi. |
1985 | 56 | Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0); |
1986 | 56 | result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result); |
1987 | | |
1988 | | // x == 0, y < 0 -> -pi/2 |
1989 | 56 | Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0); |
1990 | 56 | result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result); |
1991 | | // x == 0, y > 0 -> pi/2 |
1992 | 56 | Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0); |
1993 | 56 | result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result); |
1994 | | |
1995 | 56 | return result; |
1996 | 56 | } |
1997 | | |
1998 | | Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1999 | | HLOperationLowerHelper &helper, |
2000 | | HLObjectOperationLowerHelper *pObjHelper, |
2001 | 764 | bool &Translated) { |
2002 | 764 | hlsl::OP *hlslOP = &helper.hlslOP; |
2003 | 764 | Type *Ty = CI->getType(); |
2004 | 764 | Type *EltTy = Ty->getScalarType(); |
2005 | 764 | DXIL::OpCode maxOp = DXIL::OpCode::FMax; |
2006 | 764 | DXIL::OpCode minOp = DXIL::OpCode::FMin; |
2007 | 764 | if (IOP == IntrinsicOp::IOP_uclamp) { |
2008 | 56 | maxOp = DXIL::OpCode::UMax; |
2009 | 56 | minOp = DXIL::OpCode::UMin; |
2010 | 708 | } else if (EltTy->isIntegerTy()) { |
2011 | 48 | maxOp = DXIL::OpCode::IMax; |
2012 | 48 | minOp = DXIL::OpCode::IMin; |
2013 | 48 | } |
2014 | | |
2015 | 764 | Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx); |
2016 | 764 | Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx); |
2017 | 764 | Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx); |
2018 | | |
2019 | 764 | IRBuilder<> Builder(CI); |
2020 | | // min(max(x, minVal), maxVal). |
2021 | 764 | Value *maxXMinVal = |
2022 | 764 | TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder); |
2023 | 764 | return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); |
2024 | 764 | } |
2025 | | |
2026 | | Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2027 | | HLOperationLowerHelper &helper, |
2028 | | HLObjectOperationLowerHelper *pObjHelper, |
2029 | 110 | bool &Translated) { |
2030 | 110 | hlsl::OP *hlslOP = &helper.hlslOP; |
2031 | 110 | Function *discard = |
2032 | 110 | hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext())); |
2033 | 110 | IRBuilder<> Builder(CI); |
2034 | 110 | Value *cond = nullptr; |
2035 | 110 | Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2036 | 110 | if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) { |
2037 | 14 | Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0); |
2038 | 14 | cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0)); |
2039 | 50 | for (unsigned i = 1; i < VT->getNumElements(); i++36 ) { |
2040 | 36 | Value *elt = Builder.CreateExtractElement(arg, i); |
2041 | 36 | Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0)); |
2042 | 36 | cond = Builder.CreateOr(cond, eltCond); |
2043 | 36 | } |
2044 | 14 | } else |
2045 | 96 | cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0)); |
2046 | | |
2047 | | /*If discard condition evaluates to false at compile-time, then |
2048 | | don't emit the discard instruction.*/ |
2049 | 110 | if (ConstantInt *constCond = dyn_cast<ConstantInt>(cond)) |
2050 | 78 | if (!constCond->getLimitedValue()) |
2051 | 10 | return nullptr; |
2052 | | |
2053 | 100 | Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard); |
2054 | 100 | Builder.CreateCall(discard, {opArg, cond}); |
2055 | 100 | return nullptr; |
2056 | 110 | } |
2057 | | |
2058 | | Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2059 | | HLOperationLowerHelper &helper, |
2060 | | HLObjectOperationLowerHelper *pObjHelper, |
2061 | 104 | bool &Translated) { |
2062 | 104 | VectorType *VT = cast<VectorType>(CI->getType()); |
2063 | 104 | DXASSERT_NOMSG(VT->getNumElements() == 3); |
2064 | | |
2065 | 104 | Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2066 | 104 | Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2067 | | |
2068 | 104 | IRBuilder<> Builder(CI); |
2069 | 104 | Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0); |
2070 | 104 | Value *op0_y = Builder.CreateExtractElement(op0, 1); |
2071 | 104 | Value *op0_z = Builder.CreateExtractElement(op0, 2); |
2072 | | |
2073 | 104 | Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0); |
2074 | 104 | Value *op1_y = Builder.CreateExtractElement(op1, 1); |
2075 | 104 | Value *op1_z = Builder.CreateExtractElement(op1, 2); |
2076 | | |
2077 | 312 | auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * { |
2078 | 312 | Value *xy = Builder.CreateFMul(x0, y1); |
2079 | 312 | Value *yx = Builder.CreateFMul(y0, x1); |
2080 | 312 | return Builder.CreateFSub(xy, yx); |
2081 | 312 | }; |
2082 | | |
2083 | 104 | Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z); |
2084 | 104 | Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x); |
2085 | 104 | Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y); |
2086 | | |
2087 | 104 | Value *cross = UndefValue::get(VT); |
2088 | 104 | cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0); |
2089 | 104 | cross = Builder.CreateInsertElement(cross, zx_xz, 1); |
2090 | 104 | cross = Builder.CreateInsertElement(cross, xy_yx, 2); |
2091 | 104 | return cross; |
2092 | 104 | } |
2093 | | |
2094 | | Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2095 | | HLOperationLowerHelper &helper, |
2096 | | HLObjectOperationLowerHelper *pObjHelper, |
2097 | 32 | bool &Translated) { |
2098 | 32 | IRBuilder<> Builder(CI); |
2099 | 32 | Type *Ty = CI->getType(); |
2100 | 32 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2101 | | // 180/pi. |
2102 | 32 | Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI); |
2103 | 32 | if (Ty != Ty->getScalarType()) { |
2104 | 16 | toDegreeConst = |
2105 | 16 | ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst); |
2106 | 16 | } |
2107 | 32 | return Builder.CreateFMul(toDegreeConst, val); |
2108 | 32 | } |
2109 | | |
2110 | | Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2111 | | HLOperationLowerHelper &helper, |
2112 | | HLObjectOperationLowerHelper *pObjHelper, |
2113 | 16 | bool &Translated) { |
2114 | 16 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2115 | 16 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2116 | 16 | Type *Ty = src1->getType(); |
2117 | 16 | IRBuilder<> Builder(CI); |
2118 | 16 | Value *Result = UndefValue::get(Ty); |
2119 | 16 | Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1); |
2120 | | // dest.x = 1; |
2121 | 16 | Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0); |
2122 | | // dest.y = src0.y * src1.y; |
2123 | 16 | Value *src0_y = Builder.CreateExtractElement(src0, 1); |
2124 | 16 | Value *src1_y = Builder.CreateExtractElement(src1, 1); |
2125 | 16 | Value *yMuly = Builder.CreateFMul(src0_y, src1_y); |
2126 | 16 | Result = Builder.CreateInsertElement(Result, yMuly, 1); |
2127 | | // dest.z = src0.z; |
2128 | 16 | Value *src0_z = Builder.CreateExtractElement(src0, 2); |
2129 | 16 | Result = Builder.CreateInsertElement(Result, src0_z, 2); |
2130 | | // dest.w = src1.w; |
2131 | 16 | Value *src1_w = Builder.CreateExtractElement(src1, 3); |
2132 | 16 | Result = Builder.CreateInsertElement(Result, src1_w, 3); |
2133 | 16 | return Result; |
2134 | 16 | } |
2135 | | |
2136 | | Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2137 | | HLOperationLowerHelper &helper, |
2138 | | HLObjectOperationLowerHelper *pObjHelper, |
2139 | 204 | bool &Translated) { |
2140 | 204 | hlsl::OP *OP = &helper.hlslOP; |
2141 | 204 | IRBuilder<> Builder(CI); |
2142 | 204 | Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2143 | | |
2144 | 204 | Type *Ty = Src->getType(); |
2145 | 204 | Type *RetTy = Type::getInt32Ty(CI->getContext()); |
2146 | 204 | unsigned NumElements = 0; |
2147 | 204 | if (Ty->isVectorTy()) { |
2148 | 38 | NumElements = Ty->getVectorNumElements(); |
2149 | 38 | RetTy = VectorType::get(RetTy, NumElements); |
2150 | 38 | } |
2151 | | |
2152 | 204 | Constant *OpArg = OP->GetU32Const((unsigned)opcode); |
2153 | 204 | Value *Args[] = {OpArg, Src}; |
2154 | | |
2155 | 204 | Value *FirstbitHi = |
2156 | 204 | TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder); |
2157 | | |
2158 | 204 | IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType()); |
2159 | 204 | Constant *Neg1 = Builder.getInt32(-1); |
2160 | 204 | Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1); |
2161 | | |
2162 | 204 | if (NumElements > 0) { |
2163 | 38 | Neg1 = ConstantVector::getSplat(NumElements, Neg1); |
2164 | 38 | BitWidth = ConstantVector::getSplat(NumElements, BitWidth); |
2165 | 38 | } |
2166 | | |
2167 | 204 | Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi); |
2168 | 204 | Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi); |
2169 | 204 | return Builder.CreateSelect(Cond, Neg1, Sub); |
2170 | 204 | } |
2171 | | |
2172 | | Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2173 | | HLOperationLowerHelper &helper, |
2174 | | HLObjectOperationLowerHelper *pObjHelper, |
2175 | 178 | bool &Translated) { |
2176 | 178 | hlsl::OP *OP = &helper.hlslOP; |
2177 | 178 | IRBuilder<> Builder(CI); |
2178 | 178 | Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2179 | | |
2180 | 178 | Type *Ty = Src->getType(); |
2181 | 178 | Type *RetTy = Type::getInt32Ty(CI->getContext()); |
2182 | 178 | if (Ty->isVectorTy()) |
2183 | 40 | RetTy = VectorType::get(RetTy, Ty->getVectorNumElements()); |
2184 | | |
2185 | 178 | Constant *OpArg = OP->GetU32Const((unsigned)opcode); |
2186 | 178 | Value *Args[] = {OpArg, Src}; |
2187 | | |
2188 | 178 | Value *FirstbitLo = |
2189 | 178 | TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder); |
2190 | | |
2191 | 178 | return FirstbitLo; |
2192 | 178 | } |
2193 | | |
2194 | | Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2195 | | HLOperationLowerHelper &helper, |
2196 | | HLObjectOperationLowerHelper *pObjHelper, |
2197 | 24 | bool &Translated) { |
2198 | 24 | Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
2199 | 24 | Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
2200 | 24 | Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
2201 | 24 | IRBuilder<> Builder(CI); |
2202 | | |
2203 | 24 | Type *Ty = m->getType(); |
2204 | 24 | Value *Result = UndefValue::get(VectorType::get(Ty, 4)); |
2205 | | // Result = (ambient, diffuse, specular, 1) |
2206 | | // ambient = 1. |
2207 | 24 | Constant *oneConst = ConstantFP::get(Ty, 1); |
2208 | 24 | Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0); |
2209 | | // Result.w = 1. |
2210 | 24 | Result = Builder.CreateInsertElement(Result, oneConst, 3); |
2211 | | // diffuse = (n_dot_l < 0) ? 0 : n_dot_l. |
2212 | 24 | Constant *zeroConst = ConstantFP::get(Ty, 0); |
2213 | 24 | Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst); |
2214 | 24 | Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l); |
2215 | 24 | Result = Builder.CreateInsertElement(Result, diffuse, 1); |
2216 | | // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m). |
2217 | 24 | Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst); |
2218 | 24 | Value *specCond = Builder.CreateOr(nlCmp, nhCmp); |
2219 | 24 | bool isFXCCompatMode = |
2220 | 24 | CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode; |
2221 | 24 | Value *nhPowM = |
2222 | 24 | TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode); |
2223 | 24 | Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM); |
2224 | 24 | Result = Builder.CreateInsertElement(Result, spec, 2); |
2225 | 24 | return Result; |
2226 | 24 | } |
2227 | | |
2228 | | Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2229 | | HLOperationLowerHelper &helper, |
2230 | | HLObjectOperationLowerHelper *pObjHelper, |
2231 | 36 | bool &Translated) { |
2232 | 36 | IRBuilder<> Builder(CI); |
2233 | 36 | Type *Ty = CI->getType(); |
2234 | 36 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2235 | | // pi/180. |
2236 | 36 | Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180); |
2237 | 36 | if (Ty != Ty->getScalarType()) { |
2238 | 20 | toRadianConst = |
2239 | 20 | ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst); |
2240 | 20 | } |
2241 | 36 | return Builder.CreateFMul(toRadianConst, val); |
2242 | 36 | } |
2243 | | |
2244 | | Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2245 | | HLOperationLowerHelper &helper, |
2246 | | HLObjectOperationLowerHelper *pObjHelper, |
2247 | 384 | bool &Translated) { |
2248 | 384 | IRBuilder<> Builder(CI); |
2249 | | |
2250 | 384 | Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2251 | 384 | Type *Ty = CI->getType(); |
2252 | | |
2253 | 384 | Function *f16tof32 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy); |
2254 | 384 | return TrivialDxilOperation( |
2255 | 384 | f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x}, |
2256 | 384 | x->getType(), Ty, &helper.hlslOP, Builder); |
2257 | 384 | } |
2258 | | |
2259 | | Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2260 | | HLOperationLowerHelper &helper, |
2261 | | HLObjectOperationLowerHelper *pObjHelper, |
2262 | 304 | bool &Translated) { |
2263 | 304 | IRBuilder<> Builder(CI); |
2264 | | |
2265 | 304 | Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2266 | 304 | Type *Ty = CI->getType(); |
2267 | | |
2268 | 304 | Function *f32tof16 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy); |
2269 | 304 | return TrivialDxilOperation( |
2270 | 304 | f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x}, |
2271 | 304 | x->getType(), Ty, &helper.hlslOP, Builder); |
2272 | 304 | } |
2273 | | |
2274 | 282 | Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) { |
2275 | 282 | IRBuilder<> Builder(CI); |
2276 | 282 | if (VectorType *VT = dyn_cast<VectorType>(val->getType())) { |
2277 | 282 | Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0); |
2278 | 282 | unsigned size = VT->getNumElements(); |
2279 | 282 | if (size > 1) { |
2280 | 282 | Value *Sum = Builder.CreateFMul(Elt, Elt); |
2281 | 710 | for (unsigned i = 1; i < size; i++428 ) { |
2282 | 428 | Elt = Builder.CreateExtractElement(val, i); |
2283 | 428 | Value *Mul = Builder.CreateFMul(Elt, Elt); |
2284 | 428 | Sum = Builder.CreateFAdd(Sum, Mul); |
2285 | 428 | } |
2286 | 282 | DXIL::OpCode sqrt = DXIL::OpCode::Sqrt; |
2287 | 282 | Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType()); |
2288 | 282 | Value *opArg = hlslOP->GetI32Const((unsigned)sqrt); |
2289 | 282 | return Builder.CreateCall(dxilSqrt, {opArg, Sum}, |
2290 | 282 | hlslOP->GetOpCodeName(sqrt)); |
2291 | 282 | } else { |
2292 | 0 | val = Elt; |
2293 | 0 | } |
2294 | 282 | } |
2295 | 0 | DXIL::OpCode fabs = DXIL::OpCode::FAbs; |
2296 | 0 | Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType()); |
2297 | 0 | Value *opArg = hlslOP->GetI32Const((unsigned)fabs); |
2298 | 0 | return Builder.CreateCall(dxilFAbs, {opArg, val}, |
2299 | 0 | hlslOP->GetOpCodeName(fabs)); |
2300 | 282 | } |
2301 | | |
2302 | | Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2303 | | HLOperationLowerHelper &helper, |
2304 | | HLObjectOperationLowerHelper *pObjHelper, |
2305 | 226 | bool &Translated) { |
2306 | 226 | hlsl::OP *hlslOP = &helper.hlslOP; |
2307 | 226 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2308 | 226 | return TranslateLength(CI, val, hlslOP); |
2309 | 226 | } |
2310 | | |
2311 | | Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2312 | | HLOperationLowerHelper &helper, |
2313 | | HLObjectOperationLowerHelper *pObjHelper, |
2314 | 64 | bool &Translated) { |
2315 | 64 | hlsl::OP *hlslOP = &helper.hlslOP; |
2316 | 64 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2317 | 64 | Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2318 | 64 | IRBuilder<> Builder(CI); |
2319 | 64 | Value *intP = |
2320 | 64 | TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder); |
2321 | 64 | Value *fracP = Builder.CreateFSub(val, intP); |
2322 | 64 | Builder.CreateStore(intP, outIntPtr); |
2323 | 64 | return fracP; |
2324 | 64 | } |
2325 | | |
2326 | | Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2327 | | HLOperationLowerHelper &helper, |
2328 | | HLObjectOperationLowerHelper *pObjHelper, |
2329 | 56 | bool &Translated) { |
2330 | 56 | hlsl::OP *hlslOP = &helper.hlslOP; |
2331 | 56 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2332 | 56 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2333 | 56 | IRBuilder<> Builder(CI); |
2334 | 56 | Value *sub = Builder.CreateFSub(src0, src1); |
2335 | 56 | return TranslateLength(CI, sub, hlslOP); |
2336 | 56 | } |
2337 | | |
2338 | | Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2339 | | HLOperationLowerHelper &helper, |
2340 | | HLObjectOperationLowerHelper *pObjHelper, |
2341 | 44 | bool &Translated) { |
2342 | 44 | hlsl::OP *hlslOP = &helper.hlslOP; |
2343 | 44 | IRBuilder<> Builder(CI); |
2344 | 44 | Type *Ty = CI->getType(); |
2345 | 44 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2346 | 44 | Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E); |
2347 | 44 | if (Ty != Ty->getScalarType()) { |
2348 | 20 | log2eConst = |
2349 | 20 | ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst); |
2350 | 20 | } |
2351 | 44 | val = Builder.CreateFMul(log2eConst, val); |
2352 | 44 | Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder); |
2353 | 44 | return exp; |
2354 | 44 | } |
2355 | | |
2356 | | Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2357 | | HLOperationLowerHelper &helper, |
2358 | | HLObjectOperationLowerHelper *pObjHelper, |
2359 | 56 | bool &Translated) { |
2360 | 56 | hlsl::OP *hlslOP = &helper.hlslOP; |
2361 | 56 | IRBuilder<> Builder(CI); |
2362 | 56 | Type *Ty = CI->getType(); |
2363 | 56 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2364 | 56 | Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2); |
2365 | 56 | if (Ty != Ty->getScalarType()) { |
2366 | 20 | ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const); |
2367 | 20 | } |
2368 | 56 | Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); |
2369 | | |
2370 | 56 | return Builder.CreateFMul(ln2Const, log); |
2371 | 56 | } |
2372 | | |
2373 | | Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2374 | | HLOperationLowerHelper &helper, |
2375 | | HLObjectOperationLowerHelper *pObjHelper, |
2376 | 24 | bool &Translated) { |
2377 | 24 | hlsl::OP *hlslOP = &helper.hlslOP; |
2378 | 24 | IRBuilder<> Builder(CI); |
2379 | 24 | Type *Ty = CI->getType(); |
2380 | 24 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2381 | 24 | Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); |
2382 | 24 | if (Ty != Ty->getScalarType()) { |
2383 | 8 | log2_10Const = |
2384 | 8 | ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const); |
2385 | 8 | } |
2386 | 24 | Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); |
2387 | | |
2388 | 24 | return Builder.CreateFMul(log2_10Const, log); |
2389 | 24 | } |
2390 | | |
2391 | | Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2392 | | HLOperationLowerHelper &helper, |
2393 | | HLObjectOperationLowerHelper *pObjHelper, |
2394 | 72 | bool &Translated) { |
2395 | 72 | hlsl::OP *hlslOP = &helper.hlslOP; |
2396 | 72 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2397 | 72 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2398 | 72 | IRBuilder<> Builder(CI); |
2399 | 72 | Value *div = Builder.CreateFDiv(src0, src1); |
2400 | 72 | Value *negDiv = Builder.CreateFNeg(div); |
2401 | 72 | Value *ge = Builder.CreateFCmpOGE(div, negDiv); |
2402 | 72 | Value *absDiv = |
2403 | 72 | TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder); |
2404 | 72 | Value *frc = |
2405 | 72 | TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder); |
2406 | 72 | Value *negFrc = Builder.CreateFNeg(frc); |
2407 | 72 | Value *realFrc = Builder.CreateSelect(ge, frc, negFrc); |
2408 | 72 | return Builder.CreateFMul(realFrc, src1); |
2409 | 72 | } |
2410 | | |
2411 | | Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2412 | | HLOperationLowerHelper &helper, |
2413 | | HLObjectOperationLowerHelper *pObjHelper, |
2414 | 2.49k | bool &Translated) { |
2415 | 2.49k | bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy(); |
2416 | 2.49k | if (isFloat) { |
2417 | 1.33k | switch (IOP) { |
2418 | 832 | case IntrinsicOp::IOP_max: |
2419 | 832 | opcode = OP::OpCode::FMax; |
2420 | 832 | break; |
2421 | 498 | case IntrinsicOp::IOP_min: |
2422 | 498 | default: |
2423 | 498 | DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min); |
2424 | 498 | opcode = OP::OpCode::FMin; |
2425 | 498 | break; |
2426 | 1.33k | } |
2427 | 1.33k | } |
2428 | 2.49k | return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, |
2429 | 2.49k | Translated); |
2430 | 2.49k | } |
2431 | | |
2432 | | Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2433 | | HLOperationLowerHelper &helper, |
2434 | | HLObjectOperationLowerHelper *pObjHelper, |
2435 | 11.9k | bool &Translated) { |
2436 | 11.9k | bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy(); |
2437 | 11.9k | if (isFloat) { |
2438 | 11.3k | switch (IOP) { |
2439 | 11.3k | case IntrinsicOp::IOP_mad: |
2440 | 11.3k | default: |
2441 | 11.3k | DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad); |
2442 | 11.3k | opcode = OP::OpCode::FMad; |
2443 | 11.3k | break; |
2444 | 11.3k | } |
2445 | 11.3k | } |
2446 | 11.9k | return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, |
2447 | 11.9k | Translated); |
2448 | 11.9k | } |
2449 | | |
2450 | | Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2451 | | HLOperationLowerHelper &helper, |
2452 | | HLObjectOperationLowerHelper *pObjHelper, |
2453 | 60 | bool &Translated) { |
2454 | 60 | hlsl::OP *hlslOP = &helper.hlslOP; |
2455 | 60 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2456 | 60 | Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2457 | 60 | IRBuilder<> Builder(CI); |
2458 | 60 | Type *i32Ty = Type::getInt32Ty(CI->getContext()); |
2459 | 60 | Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000); |
2460 | 60 | Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff); |
2461 | 60 | Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23); |
2462 | 60 | Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000); |
2463 | 60 | Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000); |
2464 | 60 | Constant *zeroVal = hlslOP->GetFloatConst(0); |
2465 | | // int iVal = asint(val); |
2466 | 60 | Type *dstTy = i32Ty; |
2467 | 60 | Type *Ty = val->getType(); |
2468 | 60 | if (Ty->isVectorTy()) { |
2469 | 28 | unsigned vecSize = Ty->getVectorNumElements(); |
2470 | 28 | dstTy = VectorType::get(i32Ty, vecSize); |
2471 | 28 | exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst); |
2472 | 28 | mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst); |
2473 | 28 | exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst); |
2474 | 28 | mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst); |
2475 | 28 | exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst); |
2476 | 28 | zeroVal = ConstantVector::getSplat(vecSize, zeroVal); |
2477 | 28 | } |
2478 | | |
2479 | | // bool ne = val != 0; |
2480 | 60 | Value *notZero = Builder.CreateFCmpUNE(val, zeroVal); |
2481 | 60 | notZero = Builder.CreateSExt(notZero, dstTy); |
2482 | | |
2483 | 60 | Value *intVal = Builder.CreateBitCast(val, dstTy); |
2484 | | // temp = intVal & exponentMask; |
2485 | 60 | Value *temp = Builder.CreateAnd(intVal, exponentMaskConst); |
2486 | | // temp = temp + exponentBias; |
2487 | 60 | temp = Builder.CreateAdd(temp, exponentBiasConst); |
2488 | | // temp = temp & ne; |
2489 | 60 | temp = Builder.CreateAnd(temp, notZero); |
2490 | | // temp = temp >> exponentShift; |
2491 | 60 | temp = Builder.CreateAShr(temp, exponentShiftConst); |
2492 | | // exp = float(temp); |
2493 | 60 | Value *exp = Builder.CreateSIToFP(temp, Ty); |
2494 | 60 | Builder.CreateStore(exp, expPtr); |
2495 | | // temp = iVal & mantisaMask; |
2496 | 60 | temp = Builder.CreateAnd(intVal, mantisaMaskConst); |
2497 | | // temp = temp | mantisaOr; |
2498 | 60 | temp = Builder.CreateOr(temp, mantisaOrConst); |
2499 | | // mantisa = temp & ne; |
2500 | 60 | Value *mantisa = Builder.CreateAnd(temp, notZero); |
2501 | 60 | return Builder.CreateBitCast(mantisa, Ty); |
2502 | 60 | } |
2503 | | |
2504 | | Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2505 | | HLOperationLowerHelper &helper, |
2506 | | HLObjectOperationLowerHelper *pObjHelper, |
2507 | 38 | bool &Translated) { |
2508 | 38 | hlsl::OP *hlslOP = &helper.hlslOP; |
2509 | 38 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2510 | 38 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2511 | 38 | IRBuilder<> Builder(CI); |
2512 | 38 | Value *exp = |
2513 | 38 | TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder); |
2514 | 38 | return Builder.CreateFMul(exp, src0); |
2515 | 38 | } |
2516 | | |
2517 | | Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2518 | | HLOperationLowerHelper &helper, |
2519 | | HLObjectOperationLowerHelper *pObjHelper, |
2520 | 36 | bool &Translated) { |
2521 | 36 | hlsl::OP *hlslOP = &helper.hlslOP; |
2522 | 36 | Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2523 | 36 | IRBuilder<> Builder(CI); |
2524 | 36 | Value *ddx = |
2525 | 36 | TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder); |
2526 | 36 | Value *absDdx = |
2527 | 36 | TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder); |
2528 | 36 | Value *ddy = |
2529 | 36 | TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder); |
2530 | 36 | Value *absDdy = |
2531 | 36 | TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder); |
2532 | 36 | return Builder.CreateFAdd(absDdx, absDdy); |
2533 | 36 | } |
2534 | | |
2535 | | Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2536 | | HLOperationLowerHelper &helper, |
2537 | | HLObjectOperationLowerHelper *pObjHelper, |
2538 | 348 | bool &Translated) { |
2539 | | // x + s(y-x) |
2540 | 348 | Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx); |
2541 | 348 | Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx); |
2542 | 348 | IRBuilder<> Builder(CI); |
2543 | 348 | Value *ySubx = Builder.CreateFSub(y, x); |
2544 | 348 | Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx); |
2545 | 348 | Value *sMulSub = Builder.CreateFMul(s, ySubx); |
2546 | 348 | return Builder.CreateFAdd(x, sMulSub); |
2547 | 348 | } |
2548 | | |
2549 | | Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, |
2550 | 2.38k | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
2551 | 2.38k | Type *Ty = src0->getType()->getScalarType(); |
2552 | 2.38k | Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); |
2553 | 2.38k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
2554 | | |
2555 | 2.38k | SmallVector<Value *, 9> args; |
2556 | 2.38k | args.emplace_back(opArg); |
2557 | | |
2558 | 2.38k | unsigned vecSize = src0->getType()->getVectorNumElements(); |
2559 | 9.76k | for (unsigned i = 0; i < vecSize; i++7.38k ) |
2560 | 7.38k | args.emplace_back(Builder.CreateExtractElement(src0, i)); |
2561 | | |
2562 | 9.76k | for (unsigned i = 0; i < vecSize; i++7.38k ) |
2563 | 7.38k | args.emplace_back(Builder.CreateExtractElement(src1, i)); |
2564 | 2.38k | Value *dotOP = Builder.CreateCall(dxilFunc, args); |
2565 | | |
2566 | 2.38k | return dotOP; |
2567 | 2.38k | } |
2568 | | |
2569 | | // Instead of using a DXIL intrinsic, implement a dot product operation using |
2570 | | // multiply and add operations. Used for integer dots and long vectors. |
2571 | | Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, |
2572 | | IRBuilder<> &Builder, |
2573 | 366 | DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { |
2574 | 366 | Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); |
2575 | 366 | Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); |
2576 | 366 | Value *Result; |
2577 | 366 | if (Elt0->getType()->isFloatingPointTy()) |
2578 | 0 | Result = Builder.CreateFMul(Elt0, Elt1); |
2579 | 366 | else |
2580 | 366 | Result = Builder.CreateMul(Elt0, Elt1); |
2581 | 1.24k | for (unsigned Elt = 1; Elt < vecSize; ++Elt876 ) { |
2582 | 876 | Elt0 = Builder.CreateExtractElement(arg0, Elt); |
2583 | 876 | Elt1 = Builder.CreateExtractElement(arg1, Elt); |
2584 | 876 | Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP, |
2585 | 876 | Builder); |
2586 | 876 | } |
2587 | | |
2588 | 366 | return Result; |
2589 | 366 | } |
2590 | | |
2591 | | Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize, |
2592 | 2.41k | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
2593 | 2.41k | switch (vecSize) { |
2594 | 198 | case 2: |
2595 | 198 | return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder); |
2596 | 0 | break; |
2597 | 1.76k | case 3: |
2598 | 1.76k | return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder); |
2599 | 0 | break; |
2600 | 426 | case 4: |
2601 | 426 | return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder); |
2602 | 0 | break; |
2603 | 28 | default: |
2604 | 28 | DXASSERT(vecSize == 1, "wrong vector size"); |
2605 | 28 | { |
2606 | 28 | Value *vecMul = Builder.CreateFMul(arg0, arg1); |
2607 | 28 | return Builder.CreateExtractElement(vecMul, (uint64_t)0); |
2608 | 0 | } |
2609 | 0 | break; |
2610 | 2.41k | } |
2611 | 2.41k | } |
2612 | | |
2613 | | Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2614 | | HLOperationLowerHelper &helper, |
2615 | | HLObjectOperationLowerHelper *pObjHelper, |
2616 | 1.98k | bool &Translated) { |
2617 | 1.98k | hlsl::OP *hlslOP = &helper.hlslOP; |
2618 | 1.98k | Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2619 | 1.98k | Type *Ty = arg0->getType(); |
2620 | 1.98k | Type *EltTy = Ty->getScalarType(); |
2621 | | |
2622 | | // SM6.9 introduced a DXIL operation for vectorized dot product |
2623 | | // The operation is only advantageous for vect size>1, vec1s will be |
2624 | | // lowered to a single Mul. |
2625 | 1.98k | if (hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && |
2626 | 1.98k | EltTy->isFloatingPointTy()20 && Ty->getVectorNumElements() > 120 ) { |
2627 | 18 | Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2628 | 18 | IRBuilder<> Builder(CI); |
2629 | 18 | Constant *opArg = hlslOP->GetU32Const((unsigned)DXIL::OpCode::FDot); |
2630 | 18 | Value *args[] = {opArg, arg0, arg1}; |
2631 | 18 | Function *dxilFunc = hlslOP->GetOpFunc(DXIL::OpCode::FDot, Ty); |
2632 | 18 | return TrivialDxilVectorOperation(dxilFunc, DXIL::OpCode::FDot, args, Ty, |
2633 | 18 | hlslOP, Builder); |
2634 | 18 | } |
2635 | | |
2636 | 1.96k | unsigned vecSize = Ty->getVectorNumElements(); |
2637 | 1.96k | Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2638 | 1.96k | IRBuilder<> Builder(CI); |
2639 | 1.96k | if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 41.64k ) |
2640 | 1.64k | return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); |
2641 | | |
2642 | 320 | DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; |
2643 | 320 | if (IOP == IntrinsicOp::IOP_udot) |
2644 | 224 | MadOpCode = DXIL::OpCode::UMad; |
2645 | 96 | else if (EltTy->isFloatingPointTy()) |
2646 | 0 | MadOpCode = DXIL::OpCode::FMad; |
2647 | 320 | return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); |
2648 | 1.96k | } |
2649 | | |
2650 | | Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2651 | | HLOperationLowerHelper &helper, |
2652 | | HLObjectOperationLowerHelper *pObjHelper, |
2653 | 648 | bool &Translated) { |
2654 | 648 | hlsl::OP *hlslOP = &helper.hlslOP; |
2655 | 648 | Type *Ty = CI->getType(); |
2656 | 648 | Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2657 | 648 | VectorType *VT = cast<VectorType>(Ty); |
2658 | 648 | unsigned vecSize = VT->getNumElements(); |
2659 | | |
2660 | 648 | IRBuilder<> Builder(CI); |
2661 | 648 | Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder); |
2662 | 648 | DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt; |
2663 | 648 | Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType()); |
2664 | 648 | Value *rsqrt = Builder.CreateCall( |
2665 | 648 | dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot}, |
2666 | 648 | hlslOP->GetOpCodeName(rsqrtOp)); |
2667 | 648 | Value *vecRsqrt = UndefValue::get(VT); |
2668 | 2.60k | for (unsigned i = 0; i < VT->getNumElements(); i++1.95k ) |
2669 | 1.95k | vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i); |
2670 | | |
2671 | 648 | return Builder.CreateFMul(op, vecRsqrt); |
2672 | 648 | } |
2673 | | |
2674 | | Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2675 | | HLOperationLowerHelper &helper, |
2676 | | HLObjectOperationLowerHelper *pObjHelper, |
2677 | 16 | bool &Translated) { |
2678 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
2679 | | // v = i - 2 * n * dot(i, n). |
2680 | 16 | IRBuilder<> Builder(CI); |
2681 | 16 | Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx); |
2682 | 16 | Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx); |
2683 | | |
2684 | 16 | VectorType *VT = cast<VectorType>(i->getType()); |
2685 | 16 | unsigned vecSize = VT->getNumElements(); |
2686 | 16 | Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder); |
2687 | | // 2 * dot (i, n). |
2688 | 16 | dot = Builder.CreateFMul(ConstantFP::get(dot->getType(), 2.0), dot); |
2689 | | // 2 * n * dot(i, n). |
2690 | 16 | Value *vecDot = Builder.CreateVectorSplat(vecSize, dot); |
2691 | 16 | Value *nMulDot = Builder.CreateFMul(vecDot, n); |
2692 | | // i - 2 * n * dot(i, n). |
2693 | 16 | return Builder.CreateFSub(i, nMulDot); |
2694 | 16 | } |
2695 | | |
2696 | | Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2697 | | HLOperationLowerHelper &helper, |
2698 | | HLObjectOperationLowerHelper *pObjHelper, |
2699 | 46 | bool &Translated) { |
2700 | 46 | hlsl::OP *hlslOP = &helper.hlslOP; |
2701 | | // d = dot(i, n); |
2702 | | // t = 1 - eta * eta * ( 1 - d*d); |
2703 | | // cond = t >= 1; |
2704 | | // r = eta * i - (eta * d + sqrt(t)) * n; |
2705 | | // return cond ? r : 0; |
2706 | 46 | IRBuilder<> Builder(CI); |
2707 | 46 | Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx); |
2708 | 46 | Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx); |
2709 | 46 | Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx); |
2710 | | |
2711 | 46 | VectorType *VT = cast<VectorType>(i->getType()); |
2712 | 46 | unsigned vecSize = VT->getNumElements(); |
2713 | 46 | Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder); |
2714 | | // eta * eta; |
2715 | 46 | Value *eta2 = Builder.CreateFMul(eta, eta); |
2716 | | // d*d; |
2717 | 46 | Value *dot2 = Builder.CreateFMul(dot, dot); |
2718 | 46 | Constant *one = ConstantFP::get(eta->getType(), 1); |
2719 | 46 | Constant *zero = ConstantFP::get(eta->getType(), 0); |
2720 | | // 1- d*d; |
2721 | 46 | dot2 = Builder.CreateFSub(one, dot2); |
2722 | | // eta * eta * (1-d*d); |
2723 | 46 | eta2 = Builder.CreateFMul(dot2, eta2); |
2724 | | // t = 1 - eta * eta * ( 1 - d*d); |
2725 | 46 | Value *t = Builder.CreateFSub(one, eta2); |
2726 | | // cond = t >= 0; |
2727 | 46 | Value *cond = Builder.CreateFCmpOGE(t, zero); |
2728 | | // eta * i; |
2729 | 46 | Value *vecEta = UndefValue::get(VT); |
2730 | 176 | for (unsigned i = 0; i < vecSize; i++130 ) |
2731 | 130 | vecEta = Builder.CreateInsertElement(vecEta, eta, i); |
2732 | 46 | Value *etaMulI = Builder.CreateFMul(i, vecEta); |
2733 | | // sqrt(t); |
2734 | 46 | Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder); |
2735 | | // eta * d; |
2736 | 46 | Value *etaMulD = Builder.CreateFMul(eta, dot); |
2737 | | // eta * d + sqrt(t); |
2738 | 46 | Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt); |
2739 | | // (eta * d + sqrt(t)) * n; |
2740 | 46 | Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt); |
2741 | 46 | Value *r = Builder.CreateFMul(vecEtaSqrt, n); |
2742 | | // r = eta * i - (eta * d + sqrt(t)) * n; |
2743 | 46 | r = Builder.CreateFSub(etaMulI, r); |
2744 | 46 | Value *refract = |
2745 | 46 | Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero)); |
2746 | 46 | return refract; |
2747 | 46 | } |
2748 | | |
2749 | | Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2750 | | HLOperationLowerHelper &helper, |
2751 | | HLObjectOperationLowerHelper *pObjHelper, |
2752 | 60 | bool &Translated) { |
2753 | 60 | hlsl::OP *hlslOP = &helper.hlslOP; |
2754 | | // s = saturate((x-min)/(max-min)). |
2755 | 60 | IRBuilder<> Builder(CI); |
2756 | 60 | Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx); |
2757 | 60 | Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx); |
2758 | 60 | Value *maxSubMin = Builder.CreateFSub(maxVal, minVal); |
2759 | 60 | Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx); |
2760 | 60 | Value *xSubMin = Builder.CreateFSub(x, minVal); |
2761 | 60 | Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin); |
2762 | | |
2763 | 60 | Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP, |
2764 | 60 | Builder); |
2765 | | // return s * s *(3-2*s). |
2766 | 60 | Constant *c2 = ConstantFP::get(CI->getType(), 2); |
2767 | 60 | Constant *c3 = ConstantFP::get(CI->getType(), 3); |
2768 | | |
2769 | 60 | Value *sMul2 = Builder.CreateFMul(s, c2); |
2770 | 60 | Value *result = Builder.CreateFSub(c3, sMul2); |
2771 | 60 | result = Builder.CreateFMul(s, result); |
2772 | 60 | result = Builder.CreateFMul(s, result); |
2773 | 60 | return result; |
2774 | 60 | } |
2775 | | |
2776 | | Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2777 | | HLOperationLowerHelper &helper, |
2778 | | HLObjectOperationLowerHelper *pObjHelper, |
2779 | 16 | bool &Translated) { |
2780 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
2781 | 16 | Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
2782 | 16 | Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
2783 | 16 | Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
2784 | 16 | Type *Ty = CI->getType(); |
2785 | 16 | IRBuilder<> Builder(CI); |
2786 | 16 | Value *vecRef = UndefValue::get(Ty); |
2787 | 80 | for (unsigned i = 0; i < 4; i++64 ) |
2788 | 64 | vecRef = Builder.CreateInsertElement(vecRef, ref, i); |
2789 | | |
2790 | 16 | Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0); |
2791 | 16 | Value *srcY = Builder.CreateExtractElement(src, 1); |
2792 | | |
2793 | 16 | Value *byteSrc = UndefValue::get(Ty); |
2794 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0); |
2795 | | |
2796 | | // ushr r0.yzw, srcX, l(0, 8, 16, 24) |
2797 | | // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw |
2798 | 16 | Value *bfiOpArg = |
2799 | 16 | hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi)); |
2800 | | |
2801 | 16 | Value *imm8 = hlslOP->GetU32Const(8); |
2802 | 16 | Value *imm16 = hlslOP->GetU32Const(16); |
2803 | 16 | Value *imm24 = hlslOP->GetU32Const(24); |
2804 | | |
2805 | 16 | Ty = ref->getType(); |
2806 | | // Get x[31:8]. |
2807 | 16 | Value *srcXShift = Builder.CreateLShr(srcX, imm8); |
2808 | | // y[0~7] x[31:8]. |
2809 | 16 | Value *byteSrcElt = TrivialDxilOperation( |
2810 | 16 | DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty, |
2811 | 16 | hlslOP, Builder); |
2812 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1); |
2813 | | // Get x[31:16]. |
2814 | 16 | srcXShift = Builder.CreateLShr(srcXShift, imm8); |
2815 | | // y[0~15] x[31:16]. |
2816 | 16 | byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi, |
2817 | 16 | {bfiOpArg, imm16, imm16, srcY, srcXShift}, |
2818 | 16 | Ty, Ty, hlslOP, Builder); |
2819 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2); |
2820 | | // Get x[31:24]. |
2821 | 16 | srcXShift = Builder.CreateLShr(srcXShift, imm8); |
2822 | | // y[0~23] x[31:24]. |
2823 | 16 | byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi, |
2824 | 16 | {bfiOpArg, imm24, imm8, srcY, srcXShift}, |
2825 | 16 | Ty, Ty, hlslOP, Builder); |
2826 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3); |
2827 | | |
2828 | | // Msad on vecref and byteSrc. |
2829 | 16 | return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum, |
2830 | 16 | hlslOP, Builder); |
2831 | 16 | } |
2832 | | |
2833 | | Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2834 | | HLOperationLowerHelper &helper, |
2835 | | HLObjectOperationLowerHelper *pObjHelper, |
2836 | 76 | bool &Translated) { |
2837 | 76 | Type *Ty = CI->getType(); |
2838 | 76 | Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2839 | 76 | IRBuilder<> Builder(CI); |
2840 | 76 | Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0); |
2841 | 76 | if (Ty != Ty->getScalarType()) { |
2842 | 56 | one = ConstantVector::getSplat(Ty->getVectorNumElements(), one); |
2843 | 56 | } |
2844 | 76 | return Builder.CreateFDiv(one, op); |
2845 | 76 | } |
2846 | | |
2847 | | Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2848 | | HLOperationLowerHelper &helper, |
2849 | | HLObjectOperationLowerHelper *pObjHelper, |
2850 | 180 | bool &Translated) { |
2851 | 180 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2852 | 180 | Type *Ty = val->getType(); |
2853 | 180 | bool IsInt = Ty->getScalarType()->isIntegerTy(); |
2854 | | |
2855 | 180 | IRBuilder<> Builder(CI); |
2856 | 180 | Constant *zero = Constant::getNullValue(Ty); |
2857 | 180 | Value *zeroLtVal = IsInt ? Builder.CreateICmpSLT(zero, val)44 |
2858 | 180 | : Builder.CreateFCmpOLT(zero, val)136 ; |
2859 | 180 | Value *valLtZero = IsInt ? Builder.CreateICmpSLT(val, zero)44 |
2860 | 180 | : Builder.CreateFCmpOLT(val, zero)136 ; |
2861 | 180 | zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType()); |
2862 | 180 | valLtZero = Builder.CreateZExt(valLtZero, CI->getType()); |
2863 | 180 | return Builder.CreateSub(zeroLtVal, valLtZero); |
2864 | 180 | } |
2865 | | |
2866 | | Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2867 | | HLOperationLowerHelper &helper, |
2868 | | HLObjectOperationLowerHelper *pObjHelper, |
2869 | 36 | bool &Translated) { |
2870 | 36 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2871 | 36 | Type *Ty = val->getType(); |
2872 | | |
2873 | 36 | IRBuilder<> Builder(CI); |
2874 | 36 | Constant *zero = Constant::getNullValue(Ty); |
2875 | 36 | Value *nonZero = Builder.CreateICmpNE(val, zero); |
2876 | 36 | return Builder.CreateZExt(nonZero, CI->getType()); |
2877 | 36 | } |
2878 | | |
2879 | | Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2880 | | HLOperationLowerHelper &helper, |
2881 | | HLObjectOperationLowerHelper *pObjHelper, |
2882 | 36 | bool &Translated) { |
2883 | 36 | Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2884 | 36 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2885 | 36 | Type *Ty = CI->getType(); |
2886 | 36 | IRBuilder<> Builder(CI); |
2887 | | |
2888 | 36 | Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0); |
2889 | 36 | Constant *zero = ConstantFP::get(Ty->getScalarType(), 0); |
2890 | 36 | Value *cond = Builder.CreateFCmpOLT(x, edge); |
2891 | | |
2892 | 36 | if (Ty != Ty->getScalarType()) { |
2893 | 20 | one = ConstantVector::getSplat(Ty->getVectorNumElements(), one); |
2894 | 20 | zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero); |
2895 | 20 | } |
2896 | | |
2897 | 36 | return Builder.CreateSelect(cond, zero, one); |
2898 | 36 | } |
2899 | | |
2900 | | Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2901 | | HLOperationLowerHelper &helper, |
2902 | | HLObjectOperationLowerHelper *pObjHelper, |
2903 | 1.43k | bool &Translated) { |
2904 | 1.43k | hlsl::OP *hlslOP = &helper.hlslOP; |
2905 | 1.43k | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2906 | 1.43k | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2907 | 1.43k | bool isFXCCompatMode = |
2908 | 1.43k | CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode; |
2909 | 1.43k | IRBuilder<> Builder(CI); |
2910 | 1.43k | return TranslatePowImpl(hlslOP, Builder, x, y, isFXCCompatMode); |
2911 | 1.43k | } |
2912 | | |
2913 | | Value *TranslatePrintf(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
2914 | | HLOperationLowerHelper &helper, |
2915 | | HLObjectOperationLowerHelper *pObjHelper, |
2916 | 2 | bool &Translated) { |
2917 | 2 | Translated = false; |
2918 | 2 | dxilutil::EmitErrorOnInstruction(CI, |
2919 | 2 | "use of unsupported identifier 'printf'"); |
2920 | 2 | return nullptr; |
2921 | 2 | } |
2922 | | |
2923 | | Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2924 | | HLOperationLowerHelper &helper, |
2925 | | HLObjectOperationLowerHelper *pObjHelper, |
2926 | 16 | bool &Translated) { |
2927 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
2928 | 16 | Type *Ty = CI->getType(); |
2929 | | |
2930 | 16 | Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
2931 | 16 | Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
2932 | 16 | Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
2933 | 16 | IRBuilder<> Builder(CI); |
2934 | | |
2935 | 16 | unsigned vecSize = Ty->getVectorNumElements(); |
2936 | | // -n x sign(dot(i, ng)). |
2937 | 16 | Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder); |
2938 | | |
2939 | 16 | Constant *zero = ConstantFP::get(Ty->getScalarType(), 0); |
2940 | 16 | Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero); |
2941 | | |
2942 | 16 | Value *negN = Builder.CreateFNeg(n); |
2943 | 16 | Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN); |
2944 | 16 | return faceforward; |
2945 | 16 | } |
2946 | | |
2947 | | Value *TrivialSetMeshOutputCounts(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2948 | | HLOperationLowerHelper &helper, |
2949 | | HLObjectOperationLowerHelper *pObjHelper, |
2950 | 258 | bool &Translated) { |
2951 | 258 | hlsl::OP *hlslOP = &helper.hlslOP; |
2952 | 258 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2953 | 258 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2954 | 258 | IRBuilder<> Builder(CI); |
2955 | 258 | Constant *opArg = hlslOP->GetU32Const((unsigned)op); |
2956 | 258 | Value *args[] = {opArg, src0, src1}; |
2957 | 258 | Function *dxilFunc = hlslOP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
2958 | | |
2959 | 258 | Builder.CreateCall(dxilFunc, args); |
2960 | 258 | return nullptr; |
2961 | 258 | } |
2962 | | |
2963 | | Value *TrivialDispatchMesh(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2964 | | HLOperationLowerHelper &helper, |
2965 | | HLObjectOperationLowerHelper *pObjHelper, |
2966 | 260 | bool &Translated) { |
2967 | 260 | hlsl::OP *hlslOP = &helper.hlslOP; |
2968 | 260 | Value *src0 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadX); |
2969 | 260 | Value *src1 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadY); |
2970 | 260 | Value *src2 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadZ); |
2971 | 260 | Value *src3 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpPayload); |
2972 | 260 | IRBuilder<> Builder(CI); |
2973 | 260 | Constant *opArg = hlslOP->GetU32Const((unsigned)op); |
2974 | 260 | Value *args[] = {opArg, src0, src1, src2, src3}; |
2975 | 260 | Function *dxilFunc = hlslOP->GetOpFunc(op, src3->getType()); |
2976 | | |
2977 | 260 | Builder.CreateCall(dxilFunc, args); |
2978 | 260 | return nullptr; |
2979 | 260 | } |
2980 | | } // namespace |
2981 | | |
2982 | | // MOP intrinsics |
2983 | | namespace { |
2984 | | |
2985 | | Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2986 | | HLOperationLowerHelper &helper, |
2987 | | HLObjectOperationLowerHelper *pObjHelper, |
2988 | 48 | bool &Translated) { |
2989 | 48 | hlsl::OP *hlslOP = &helper.hlslOP; |
2990 | 48 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
2991 | | |
2992 | 48 | IRBuilder<> Builder(CI); |
2993 | 48 | Value *sampleIdx = |
2994 | 48 | CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex); |
2995 | | |
2996 | 48 | OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition; |
2997 | 48 | llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
2998 | 48 | Function *dxilFunc = |
2999 | 48 | hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext())); |
3000 | | |
3001 | 48 | Value *args[] = {opArg, handle, sampleIdx}; |
3002 | 48 | Value *samplePos = Builder.CreateCall(dxilFunc, args); |
3003 | | |
3004 | 48 | Value *result = UndefValue::get(CI->getType()); |
3005 | 48 | Value *samplePosX = Builder.CreateExtractValue(samplePos, 0); |
3006 | 48 | Value *samplePosY = Builder.CreateExtractValue(samplePos, 1); |
3007 | 48 | result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0); |
3008 | 48 | result = Builder.CreateInsertElement(result, samplePosY, 1); |
3009 | 48 | return result; |
3010 | 48 | } |
3011 | | |
3012 | | Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
3013 | | HLOperationLowerHelper &helper, |
3014 | | HLObjectOperationLowerHelper *pObjHelper, |
3015 | 226 | bool &Translated) { |
3016 | 226 | hlsl::OP *hlslOP = &helper.hlslOP; |
3017 | | |
3018 | 226 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3019 | 226 | DxilResource::Kind RK = pObjHelper->GetRK(handle); |
3020 | | |
3021 | 226 | IRBuilder<> Builder(CI); |
3022 | 226 | OP::OpCode opcode = OP::OpCode::GetDimensions; |
3023 | 226 | llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
3024 | 226 | Function *dxilFunc = |
3025 | 226 | hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext())); |
3026 | | |
3027 | 226 | Type *i32Ty = Type::getInt32Ty(CI->getContext()); |
3028 | 226 | Value *mipLevel = UndefValue::get(i32Ty); |
3029 | 226 | unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex; |
3030 | 226 | switch (RK) { |
3031 | 0 | case DxilResource::Kind::Texture1D: |
3032 | 0 | case DxilResource::Kind::Texture1DArray: |
3033 | 56 | case DxilResource::Kind::Texture2D: |
3034 | 56 | case DxilResource::Kind::Texture2DArray: |
3035 | 66 | case DxilResource::Kind::TextureCube: |
3036 | 66 | case DxilResource::Kind::TextureCubeArray: |
3037 | 66 | case DxilResource::Kind::Texture3D: { |
3038 | 66 | Value *opMipLevel = |
3039 | 66 | CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex); |
3040 | | // mipLevel is in parameter, should not be pointer. |
3041 | 66 | if (!opMipLevel->getType()->isPointerTy()) |
3042 | 24 | mipLevel = opMipLevel; |
3043 | 42 | else { |
3044 | | // No mip level. |
3045 | 42 | widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex; |
3046 | 42 | mipLevel = ConstantInt::get(i32Ty, 0); |
3047 | 42 | } |
3048 | 66 | } break; |
3049 | 160 | default: |
3050 | 160 | widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex; |
3051 | 160 | break; |
3052 | 226 | } |
3053 | 226 | Value *args[] = {opArg, handle, mipLevel}; |
3054 | 226 | Value *dims = Builder.CreateCall(dxilFunc, args); |
3055 | | |
3056 | 226 | unsigned dimensionIdx = 0; |
3057 | | |
3058 | 226 | Value *width = Builder.CreateExtractValue(dims, dimensionIdx++); |
3059 | 226 | Value *widthPtr = CI->getArgOperand(widthOpIdx); |
3060 | 226 | if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy()) |
3061 | 8 | width = Builder.CreateSIToFP(width, |
3062 | 8 | widthPtr->getType()->getPointerElementType()); |
3063 | | |
3064 | 226 | Builder.CreateStore(width, widthPtr); |
3065 | | |
3066 | 226 | if (DXIL::IsStructuredBuffer(RK)) { |
3067 | | // Set stride. |
3068 | 52 | Value *stridePtr = CI->getArgOperand(widthOpIdx + 1); |
3069 | 52 | const DataLayout &DL = helper.dataLayout; |
3070 | 52 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3071 | 52 | Type *bufTy = pObjHelper->GetResourceType(handle); |
3072 | 52 | Type *bufRetTy = bufTy->getStructElementType(0); |
3073 | 52 | unsigned stride = DL.getTypeAllocSize(bufRetTy); |
3074 | 52 | Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr); |
3075 | 174 | } else { |
3076 | 174 | if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex || |
3077 | | // Samples is in w channel too. |
3078 | 174 | RK == DXIL::ResourceKind::Texture2DMS150 ) { |
3079 | | // Has mip. |
3080 | 68 | for (unsigned argIdx = widthOpIdx + 1; |
3081 | 136 | argIdx < CI->getNumArgOperands() - 1; argIdx++68 ) { |
3082 | 68 | Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++); |
3083 | 68 | Value *ptr = CI->getArgOperand(argIdx); |
3084 | 68 | if (ptr->getType()->getPointerElementType()->isFloatingPointTy()) |
3085 | 0 | dim = Builder.CreateSIToFP(dim, |
3086 | 0 | ptr->getType()->getPointerElementType()); |
3087 | 68 | Builder.CreateStore(dim, ptr); |
3088 | 68 | } |
3089 | | // NumOfLevel is in w channel. |
3090 | 68 | dimensionIdx = 3; |
3091 | 68 | Value *dim = Builder.CreateExtractValue(dims, dimensionIdx); |
3092 | 68 | Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1); |
3093 | 68 | if (ptr->getType()->getPointerElementType()->isFloatingPointTy()) |
3094 | 0 | dim = |
3095 | 0 | Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType()); |
3096 | 68 | Builder.CreateStore(dim, ptr); |
3097 | 106 | } else { |
3098 | 292 | for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands(); |
3099 | 186 | argIdx++) { |
3100 | 186 | Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++); |
3101 | 186 | Value *ptr = CI->getArgOperand(argIdx); |
3102 | 186 | if (ptr->getType()->getPointerElementType()->isFloatingPointTy()) |
3103 | 8 | dim = Builder.CreateSIToFP(dim, |
3104 | 8 | ptr->getType()->getPointerElementType()); |
3105 | 186 | Builder.CreateStore(dim, ptr); |
3106 | 186 | } |
3107 | 106 | } |
3108 | 174 | } |
3109 | 226 | return nullptr; |
3110 | 226 | } |
3111 | | |
3112 | | Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3113 | | HLOperationLowerHelper &helper, |
3114 | | HLObjectOperationLowerHelper *pObjHelper, |
3115 | 2.94k | bool &Translated) { |
3116 | 2.94k | hlsl::OP *hlslOP = &helper.hlslOP; |
3117 | 2.94k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3118 | | |
3119 | 2.94k | pObjHelper->MarkHasCounter(handle, helper.i8Ty); |
3120 | | |
3121 | 2.94k | bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter; |
3122 | 2.94k | IRBuilder<> Builder(CI); |
3123 | | |
3124 | 2.94k | OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter; |
3125 | 2.94k | Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode); |
3126 | 2.94k | Value *IncVal = hlslOP->GetI8Const(bInc ? 12.65k : -1286 ); |
3127 | | // Create BufferUpdateCounter call. |
3128 | 2.94k | Value *Args[] = {OpCodeArg, handle, IncVal}; |
3129 | | |
3130 | 2.94k | Function *F = |
3131 | 2.94k | hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext())); |
3132 | 2.94k | return Builder.CreateCall(F, Args); |
3133 | 2.94k | } |
3134 | | |
3135 | | static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, |
3136 | 5.78k | IRBuilder<> &Builder) { |
3137 | | // Extract value part. |
3138 | 5.78k | Value *retVal = llvm::UndefValue::get(RetTy); |
3139 | 5.78k | if (RetTy->isVectorTy()) { |
3140 | 24.1k | for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++19.2k ) { |
3141 | 19.2k | Value *retComp = Builder.CreateExtractValue(ResRet, i); |
3142 | 19.2k | retVal = Builder.CreateInsertElement(retVal, retComp, i); |
3143 | 19.2k | } |
3144 | 4.90k | } else { |
3145 | 886 | retVal = Builder.CreateExtractValue(ResRet, 0); |
3146 | 886 | } |
3147 | 5.78k | return retVal; |
3148 | 5.78k | } |
3149 | | |
3150 | | void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, |
3151 | | hlsl::OP *hlslOp, |
3152 | 19.8k | unsigned StatusIndex = DXIL::kResRetStatusIndex) { |
3153 | 19.8k | if (status && !isa<UndefValue>(status)2.25k ) { |
3154 | 2.25k | Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex); |
3155 | 2.25k | Value *checkAccessOp = hlslOp->GetI32Const( |
3156 | 2.25k | static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped)); |
3157 | 2.25k | Function *checkAccessFn = hlslOp->GetOpFunc( |
3158 | 2.25k | DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType()); |
3159 | | // CheckAccess on status. |
3160 | 2.25k | Value *bStatus = |
3161 | 2.25k | Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal}); |
3162 | 2.25k | Value *extStatus = |
3163 | 2.25k | Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext())); |
3164 | 2.25k | Builder.CreateStore(extStatus, status); |
3165 | 2.25k | } |
3166 | 19.8k | } |
3167 | | |
3168 | 3.15k | Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) { |
3169 | 3.15k | Value *Result = UndefValue::get(DstTy); |
3170 | 10.1k | for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++6.97k ) |
3171 | 6.97k | Result = Builder.CreateInsertElement(Result, Elt, i); |
3172 | 3.15k | return Result; |
3173 | 3.15k | } |
3174 | | |
3175 | | Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3176 | | HLOperationLowerHelper &helper, |
3177 | | HLObjectOperationLowerHelper *pObjHelper, |
3178 | 140 | bool &Translated) { |
3179 | | |
3180 | 140 | hlsl::OP *hlslOP = &helper.hlslOP; |
3181 | 140 | Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
3182 | 140 | Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
3183 | 140 | Type *arg0Ty = arg0->getType(); |
3184 | 140 | Type *arg1Ty = arg1->getType(); |
3185 | 140 | IRBuilder<> Builder(CI); |
3186 | | |
3187 | 140 | if (arg0Ty->isVectorTy()) { |
3188 | 104 | if (arg1Ty->isVectorTy()) { |
3189 | | // mul(vector, vector) == dot(vector, vector) |
3190 | 84 | unsigned vecSize = arg0Ty->getVectorNumElements(); |
3191 | 84 | if (arg0Ty->getScalarType()->isFloatingPointTy()) { |
3192 | 38 | return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); |
3193 | 38 | } |
3194 | | |
3195 | 46 | DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; |
3196 | 46 | if (IOP == IntrinsicOp::IOP_umul) |
3197 | 20 | MadOpCode = DXIL::OpCode::UMad; |
3198 | 46 | return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); |
3199 | 84 | } else { |
3200 | | // mul(vector, scalar) == vector * scalar-splat |
3201 | 20 | arg1 = SplatToVector(arg1, arg0Ty, Builder); |
3202 | 20 | } |
3203 | 104 | } else { |
3204 | 36 | if (arg1Ty->isVectorTy()) { |
3205 | | // mul(scalar, vector) == scalar-splat * vector |
3206 | 24 | arg0 = SplatToVector(arg0, arg1Ty, Builder); |
3207 | 24 | } |
3208 | | // else mul(scalar, scalar) == scalar * scalar; |
3209 | 36 | } |
3210 | | |
3211 | | // create fmul/mul for the pair of vectors or scalars |
3212 | 56 | if (arg0Ty->getScalarType()->isFloatingPointTy()) { |
3213 | 26 | return Builder.CreateFMul(arg0, arg1); |
3214 | 26 | } |
3215 | 30 | return Builder.CreateMul(arg0, arg1); |
3216 | 56 | } |
3217 | | |
3218 | | // Sample intrinsics. |
3219 | | struct SampleHelper { |
3220 | | SampleHelper(CallInst *CI, OP::OpCode op, |
3221 | | HLObjectOperationLowerHelper *pObjHelper); |
3222 | | |
3223 | | OP::OpCode opcode = OP::OpCode::NumOpCodes; |
3224 | | DXIL::ResourceKind resourceKind = DXIL::ResourceKind::Invalid; |
3225 | | Value *sampledTexHandle = nullptr; |
3226 | | Value *texHandle = nullptr; |
3227 | | Value *samplerHandle = nullptr; |
3228 | | static const unsigned kMaxCoordDimensions = 4; |
3229 | | unsigned coordDimensions = 0; |
3230 | | Value *coord[kMaxCoordDimensions]; |
3231 | | Value *compareValue = nullptr; |
3232 | | Value *bias = nullptr; |
3233 | | Value *lod = nullptr; |
3234 | | // SampleGrad only. |
3235 | | static const unsigned kMaxDDXYDimensions = 3; |
3236 | | Value *ddx[kMaxDDXYDimensions]; |
3237 | | Value *ddy[kMaxDDXYDimensions]; |
3238 | | // Optional. |
3239 | | static const unsigned kMaxOffsetDimensions = 3; |
3240 | | unsigned offsetDimensions = 0; |
3241 | | Value *offset[kMaxOffsetDimensions]; |
3242 | | Value *clamp = nullptr; |
3243 | | Value *status = nullptr; |
3244 | | unsigned maxHLOperandRead = 0; |
3245 | 19.5k | Value *ReadHLOperand(CallInst *CI, unsigned opIdx) { |
3246 | 19.5k | if (CI->getNumArgOperands() > opIdx) { |
3247 | 9.01k | maxHLOperandRead = std::max(maxHLOperandRead, opIdx); |
3248 | 9.01k | return CI->getArgOperand(opIdx); |
3249 | 9.01k | } |
3250 | 10.5k | return nullptr; |
3251 | 19.5k | } |
3252 | 4.86k | void TranslateCoord(CallInst *CI, unsigned coordIdx) { |
3253 | 4.86k | Value *coordArg = ReadHLOperand(CI, coordIdx); |
3254 | 4.86k | DXASSERT_NOMSG(coordArg); |
3255 | 4.86k | DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions, |
3256 | 4.86k | "otherwise, HL coordinate dimensions mismatch"); |
3257 | 4.86k | IRBuilder<> Builder(CI); |
3258 | 15.5k | for (unsigned i = 0; i < coordDimensions; i++10.6k ) |
3259 | 10.6k | coord[i] = Builder.CreateExtractElement(coordArg, i); |
3260 | 4.86k | Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3261 | 13.6k | for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++8.80k ) |
3262 | 8.80k | coord[i] = undefF; |
3263 | 4.86k | } |
3264 | 4.39k | void TranslateOffset(CallInst *CI, unsigned offsetIdx) { |
3265 | 4.39k | IntegerType *i32Ty = Type::getInt32Ty(CI->getContext()); |
3266 | 4.39k | if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) { |
3267 | 706 | DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions, |
3268 | 706 | "otherwise, HL coordinate dimensions mismatch"); |
3269 | 706 | IRBuilder<> Builder(CI); |
3270 | 2.07k | for (unsigned i = 0; i < offsetDimensions; i++1.36k ) |
3271 | 1.36k | offset[i] = Builder.CreateExtractElement(offsetArg, i); |
3272 | 3.68k | } else { |
3273 | | // Use zeros for offsets when not specified, not undef. |
3274 | 3.68k | Value *zero = ConstantInt::get(i32Ty, (uint64_t)0); |
3275 | 10.1k | for (unsigned i = 0; i < offsetDimensions; i++6.50k ) |
3276 | 6.50k | offset[i] = zero; |
3277 | 3.68k | } |
3278 | | // Use undef for components that should not be used for this resource dim. |
3279 | 4.39k | Value *undefI = UndefValue::get(i32Ty); |
3280 | 9.69k | for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++5.30k ) |
3281 | 5.30k | offset[i] = undefI; |
3282 | 4.39k | } |
3283 | 308 | void SetBias(CallInst *CI, unsigned biasIdx) { |
3284 | | // Clamp bias for immediate. |
3285 | 308 | bias = ReadHLOperand(CI, biasIdx); |
3286 | 308 | DXASSERT_NOMSG(bias); |
3287 | 308 | if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) { |
3288 | 224 | float v = FP->getValueAPF().convertToFloat(); |
3289 | 224 | if (v > DXIL::kMaxMipLodBias) |
3290 | 16 | bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias); |
3291 | 224 | if (v < DXIL::kMinMipLodBias) |
3292 | 24 | bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias); |
3293 | 224 | } |
3294 | 308 | } |
3295 | 1.21k | void SetLOD(CallInst *CI, unsigned lodIdx) { |
3296 | 1.21k | lod = ReadHLOperand(CI, lodIdx); |
3297 | 1.21k | DXASSERT_NOMSG(lod); |
3298 | 1.21k | } |
3299 | 650 | void SetCompareValue(CallInst *CI, unsigned cmpIdx) { |
3300 | 650 | compareValue = ReadHLOperand(CI, cmpIdx); |
3301 | 650 | DXASSERT_NOMSG(compareValue); |
3302 | 650 | } |
3303 | 3.33k | void SetClamp(CallInst *CI, unsigned clampIdx) { |
3304 | 3.33k | if ((clamp = ReadHLOperand(CI, clampIdx))) { |
3305 | 516 | if (clamp->getType()->isVectorTy()) { |
3306 | 0 | IRBuilder<> Builder(CI); |
3307 | 0 | clamp = Builder.CreateExtractElement(clamp, (uint64_t)0); |
3308 | 0 | } |
3309 | 516 | } else |
3310 | 2.82k | clamp = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3311 | 3.33k | } |
3312 | 4.39k | void SetStatus(CallInst *CI, unsigned statusIdx) { |
3313 | 4.39k | status = ReadHLOperand(CI, statusIdx); |
3314 | 4.39k | } |
3315 | 200 | void SetDDX(CallInst *CI, unsigned ddxIdx) { |
3316 | 200 | SetDDXY(CI, ddx, ReadHLOperand(CI, ddxIdx)); |
3317 | 200 | } |
3318 | 200 | void SetDDY(CallInst *CI, unsigned ddyIdx) { |
3319 | 200 | SetDDXY(CI, ddy, ReadHLOperand(CI, ddyIdx)); |
3320 | 200 | } |
3321 | 400 | void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg) { |
3322 | 400 | DXASSERT_NOMSG(ddxyArg); |
3323 | 400 | IRBuilder<> Builder(CI); |
3324 | 400 | unsigned ddxySize = ddxyArg->getType()->getVectorNumElements(); |
3325 | 1.32k | for (unsigned i = 0; i < ddxySize; i++928 ) |
3326 | 928 | ddxy[i] = Builder.CreateExtractElement(ddxyArg, i); |
3327 | 400 | Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3328 | 672 | for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++272 ) |
3329 | 272 | ddxy[i] = undefF; |
3330 | 400 | } |
3331 | | }; |
3332 | | |
3333 | | SampleHelper::SampleHelper(CallInst *CI, OP::OpCode op, |
3334 | | HLObjectOperationLowerHelper *pObjHelper) |
3335 | 4.86k | : opcode(op) { |
3336 | | |
3337 | 4.86k | texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3338 | 4.86k | resourceKind = pObjHelper->GetRK(texHandle); |
3339 | 4.86k | if (resourceKind == DXIL::ResourceKind::Invalid) { |
3340 | 0 | opcode = DXIL::OpCode::NumOpCodes; |
3341 | 0 | return; |
3342 | 0 | } |
3343 | | |
3344 | 4.86k | coordDimensions = opcode == DXIL::OpCode::CalculateLOD |
3345 | 4.86k | ? DxilResource::GetNumDimensionsForCalcLOD(resourceKind)172 |
3346 | 4.86k | : DxilResource::GetNumCoords(resourceKind)4.69k ; |
3347 | 4.86k | offsetDimensions = DxilResource::GetNumOffsets(resourceKind); |
3348 | | |
3349 | 4.86k | const bool bFeedbackOp = hlsl::OP::IsDxilOpFeedback(op); |
3350 | 4.86k | sampledTexHandle = |
3351 | 4.86k | bFeedbackOp ? CI->getArgOperand( |
3352 | 300 | HLOperandIndex::kWriteSamplerFeedbackSampledArgIndex) |
3353 | 4.86k | : nullptr4.56k ; |
3354 | 4.86k | const unsigned kSamplerArgIndex = |
3355 | 4.86k | bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackSamplerArgIndex300 |
3356 | 4.86k | : HLOperandIndex::kSampleSamplerArgIndex4.56k ; |
3357 | 4.86k | samplerHandle = CI->getArgOperand(kSamplerArgIndex); |
3358 | | |
3359 | 4.86k | const unsigned kCoordArgIdx = |
3360 | 4.86k | bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex300 |
3361 | 4.86k | : HLOperandIndex::kSampleCoordArgIndex4.56k ; |
3362 | 4.86k | TranslateCoord(CI, kCoordArgIdx); |
3363 | | |
3364 | | // TextureCube does not support offsets, shifting each subsequent arg index |
3365 | | // down by 1 |
3366 | 4.86k | unsigned cube = (resourceKind == DXIL::ResourceKind::TextureCube || |
3367 | 4.86k | resourceKind == DXIL::ResourceKind::TextureCubeArray4.66k ) |
3368 | 4.86k | ? 1402 |
3369 | 4.86k | : 04.46k ; |
3370 | | |
3371 | 4.86k | switch (op) { |
3372 | 2.42k | case OP::OpCode::Sample: |
3373 | 2.42k | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx66 |
3374 | 2.42k | : HLOperandIndex::kSampleOffsetArgIndex2.36k ); |
3375 | 2.42k | SetClamp(CI, HLOperandIndex::kSampleClampArgIndex - cube); |
3376 | 2.42k | SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex - cube); |
3377 | 2.42k | break; |
3378 | 1.00k | case OP::OpCode::SampleLevel: |
3379 | 1.00k | SetLOD(CI, HLOperandIndex::kSampleLLevelArgIndex); |
3380 | 1.00k | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx44 |
3381 | 1.00k | : HLOperandIndex::kSampleLOffsetArgIndex956 ); |
3382 | 1.00k | SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex - cube); |
3383 | 1.00k | break; |
3384 | 196 | case OP::OpCode::SampleBias: |
3385 | 196 | SetBias(CI, HLOperandIndex::kSampleBBiasArgIndex); |
3386 | 196 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx48 |
3387 | 196 | : HLOperandIndex::kSampleBOffsetArgIndex148 ); |
3388 | 196 | SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex - cube); |
3389 | 196 | SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex - cube); |
3390 | 196 | break; |
3391 | 222 | case OP::OpCode::SampleCmp: |
3392 | 222 | SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex); |
3393 | 222 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx66 |
3394 | 222 | : HLOperandIndex::kSampleCmpOffsetArgIndex156 ); |
3395 | 222 | SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex - cube); |
3396 | 222 | SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube); |
3397 | 222 | break; |
3398 | 48 | case OP::OpCode::SampleCmpBias: |
3399 | 48 | SetBias(CI, HLOperandIndex::kSampleCmpBBiasArgIndex); |
3400 | 48 | SetCompareValue(CI, HLOperandIndex::kSampleCmpBCmpValArgIndex); |
3401 | 48 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx8 |
3402 | 48 | : HLOperandIndex::kSampleCmpBOffsetArgIndex40 ); |
3403 | 48 | SetClamp(CI, HLOperandIndex::kSampleCmpBClampArgIndex - cube); |
3404 | 48 | SetStatus(CI, HLOperandIndex::kSampleCmpBStatusArgIndex - cube); |
3405 | 48 | break; |
3406 | 48 | case OP::OpCode::SampleCmpGrad: |
3407 | 48 | SetDDX(CI, HLOperandIndex::kSampleCmpGDDXArgIndex); |
3408 | 48 | SetDDY(CI, HLOperandIndex::kSampleCmpGDDYArgIndex); |
3409 | 48 | SetCompareValue(CI, HLOperandIndex::kSampleCmpGCmpValArgIndex); |
3410 | 48 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx16 |
3411 | 48 | : HLOperandIndex::kSampleCmpGOffsetArgIndex32 ); |
3412 | 48 | SetClamp(CI, HLOperandIndex::kSampleCmpGClampArgIndex - cube); |
3413 | 48 | SetStatus(CI, HLOperandIndex::kSampleCmpGStatusArgIndex - cube); |
3414 | 48 | break; |
3415 | 192 | case OP::OpCode::SampleCmpLevel: |
3416 | 192 | SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex); |
3417 | 192 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx48 |
3418 | 192 | : HLOperandIndex::kSampleCmpLOffsetArgIndex144 ); |
3419 | 192 | SetLOD(CI, HLOperandIndex::kSampleCmpLLevelArgIndex); |
3420 | 192 | SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube); |
3421 | 192 | break; |
3422 | 140 | case OP::OpCode::SampleCmpLevelZero: |
3423 | 140 | SetCompareValue(CI, HLOperandIndex::kSampleCmpLZCmpValArgIndex); |
3424 | 140 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx32 |
3425 | 140 | : HLOperandIndex::kSampleCmpLZOffsetArgIndex108 ); |
3426 | 140 | SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex - cube); |
3427 | 140 | break; |
3428 | 120 | case OP::OpCode::SampleGrad: |
3429 | 120 | SetDDX(CI, HLOperandIndex::kSampleGDDXArgIndex); |
3430 | 120 | SetDDY(CI, HLOperandIndex::kSampleGDDYArgIndex); |
3431 | 120 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx48 |
3432 | 120 | : HLOperandIndex::kSampleGOffsetArgIndex72 ); |
3433 | 120 | SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex - cube); |
3434 | 120 | SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex - cube); |
3435 | 120 | break; |
3436 | 172 | case OP::OpCode::CalculateLOD: |
3437 | | // Only need coord for LOD calculation. |
3438 | 172 | break; |
3439 | 180 | case OP::OpCode::WriteSamplerFeedback: |
3440 | 180 | SetClamp(CI, HLOperandIndex::kWriteSamplerFeedback_ClampArgIndex); |
3441 | 180 | break; |
3442 | 64 | case OP::OpCode::WriteSamplerFeedbackBias: |
3443 | 64 | SetBias(CI, HLOperandIndex::kWriteSamplerFeedbackBias_BiasArgIndex); |
3444 | 64 | SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackBias_ClampArgIndex); |
3445 | 64 | break; |
3446 | 32 | case OP::OpCode::WriteSamplerFeedbackGrad: |
3447 | 32 | SetDDX(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdxArgIndex); |
3448 | 32 | SetDDY(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdyArgIndex); |
3449 | 32 | SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_ClampArgIndex); |
3450 | 32 | break; |
3451 | 24 | case OP::OpCode::WriteSamplerFeedbackLevel: |
3452 | 24 | SetLOD(CI, HLOperandIndex::kWriteSamplerFeedbackLevel_LodArgIndex); |
3453 | 24 | break; |
3454 | 0 | default: |
3455 | 0 | DXASSERT(0, "invalid opcode for Sample"); |
3456 | 0 | break; |
3457 | 4.86k | } |
3458 | 4.86k | DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1, |
3459 | 4.86k | "otherwise, unused HL arguments for Sample op"); |
3460 | 4.86k | } |
3461 | | |
3462 | | Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3463 | | HLOperationLowerHelper &helper, |
3464 | | HLObjectOperationLowerHelper *pObjHelper, |
3465 | 172 | bool &Translated) { |
3466 | 172 | hlsl::OP *hlslOP = &helper.hlslOP; |
3467 | 172 | SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper); |
3468 | 172 | if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) { |
3469 | 0 | Translated = false; |
3470 | 0 | return nullptr; |
3471 | 0 | } |
3472 | | |
3473 | 172 | bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail; |
3474 | 172 | IRBuilder<> Builder(CI); |
3475 | 172 | Value *opArg = |
3476 | 172 | hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD)); |
3477 | 172 | Value *clamped = hlslOP->GetI1Const(bClamped); |
3478 | | |
3479 | 172 | Value *args[] = {opArg, |
3480 | 172 | sampleHelper.texHandle, |
3481 | 172 | sampleHelper.samplerHandle, |
3482 | 172 | sampleHelper.coord[0], |
3483 | 172 | sampleHelper.coord[1], |
3484 | 172 | sampleHelper.coord[2], |
3485 | 172 | clamped}; |
3486 | 172 | Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD, |
3487 | 172 | Type::getFloatTy(opArg->getContext())); |
3488 | 172 | Value *LOD = Builder.CreateCall(dxilFunc, args); |
3489 | 172 | return LOD; |
3490 | 172 | } |
3491 | | |
3492 | | Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3493 | | HLOperationLowerHelper &helper, |
3494 | | HLObjectOperationLowerHelper *pObjHelper, |
3495 | 456 | bool &Translated) { |
3496 | | // Translate CheckAccess into uint->bool, later optimization should remove it. |
3497 | | // Real checkaccess is generated in UpdateStatus. |
3498 | 456 | IRBuilder<> Builder(CI); |
3499 | 456 | Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
3500 | 456 | return Builder.CreateTrunc(V, helper.i1Ty); |
3501 | 456 | } |
3502 | | |
3503 | | void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs, |
3504 | 4.39k | Value *status, hlsl::OP *hlslOp) { |
3505 | 4.39k | IRBuilder<> Builder(CI); |
3506 | | |
3507 | 4.39k | CallInst *call = Builder.CreateCall(F, sampleArgs); |
3508 | | |
3509 | 4.39k | dxilutil::MigrateDebugValue(CI, call); |
3510 | | |
3511 | | // extract value part |
3512 | 4.39k | Value *retVal = ScalarizeResRet(CI->getType(), call, Builder); |
3513 | | |
3514 | | // Replace ret val. |
3515 | 4.39k | CI->replaceAllUsesWith(retVal); |
3516 | | |
3517 | | // get status |
3518 | 4.39k | if (status) { |
3519 | 352 | UpdateStatus(call, status, Builder, hlslOp); |
3520 | 352 | } |
3521 | 4.39k | } |
3522 | | |
3523 | | Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3524 | | HLOperationLowerHelper &helper, |
3525 | | HLObjectOperationLowerHelper *pObjHelper, |
3526 | 4.39k | bool &Translated) { |
3527 | 4.39k | hlsl::OP *hlslOP = &helper.hlslOP; |
3528 | 4.39k | SampleHelper sampleHelper(CI, opcode, pObjHelper); |
3529 | | |
3530 | 4.39k | if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) { |
3531 | 0 | Translated = false; |
3532 | 0 | return nullptr; |
3533 | 0 | } |
3534 | 4.39k | Type *Ty = CI->getType(); |
3535 | | |
3536 | 4.39k | Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); |
3537 | | |
3538 | 4.39k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
3539 | | |
3540 | 4.39k | switch (opcode) { |
3541 | 2.42k | case OP::OpCode::Sample: { |
3542 | 2.42k | Value *sampleArgs[] = { |
3543 | 2.42k | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3544 | | // Coord. |
3545 | 2.42k | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3546 | 2.42k | sampleHelper.coord[3], |
3547 | | // Offset. |
3548 | 2.42k | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3549 | | // Clamp. |
3550 | 2.42k | sampleHelper.clamp}; |
3551 | 2.42k | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3552 | 2.42k | } break; |
3553 | 1.00k | case OP::OpCode::SampleLevel: { |
3554 | 1.00k | Value *sampleArgs[] = { |
3555 | 1.00k | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3556 | | // Coord. |
3557 | 1.00k | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3558 | 1.00k | sampleHelper.coord[3], |
3559 | | // Offset. |
3560 | 1.00k | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3561 | | // LOD. |
3562 | 1.00k | sampleHelper.lod}; |
3563 | 1.00k | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3564 | 1.00k | } break; |
3565 | 120 | case OP::OpCode::SampleGrad: { |
3566 | 120 | Value *sampleArgs[] = { |
3567 | 120 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3568 | | // Coord. |
3569 | 120 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3570 | 120 | sampleHelper.coord[3], |
3571 | | // Offset. |
3572 | 120 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3573 | | // Ddx. |
3574 | 120 | sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2], |
3575 | | // Ddy. |
3576 | 120 | sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2], |
3577 | | // Clamp. |
3578 | 120 | sampleHelper.clamp}; |
3579 | 120 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3580 | 120 | } break; |
3581 | 196 | case OP::OpCode::SampleBias: { |
3582 | 196 | Value *sampleArgs[] = { |
3583 | 196 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3584 | | // Coord. |
3585 | 196 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3586 | 196 | sampleHelper.coord[3], |
3587 | | // Offset. |
3588 | 196 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3589 | | // Bias. |
3590 | 196 | sampleHelper.bias, |
3591 | | // Clamp. |
3592 | 196 | sampleHelper.clamp}; |
3593 | 196 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3594 | 196 | } break; |
3595 | 48 | case OP::OpCode::SampleCmpBias: { |
3596 | 48 | Value *sampleArgs[] = { |
3597 | 48 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3598 | | // Coord. |
3599 | 48 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3600 | 48 | sampleHelper.coord[3], |
3601 | | // Offset. |
3602 | 48 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3603 | | // CmpVal. |
3604 | 48 | sampleHelper.compareValue, |
3605 | | // Bias. |
3606 | 48 | sampleHelper.bias, |
3607 | | // Clamp. |
3608 | 48 | sampleHelper.clamp}; |
3609 | 48 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3610 | 48 | } break; |
3611 | 48 | case OP::OpCode::SampleCmpGrad: { |
3612 | 48 | Value *sampleArgs[] = { |
3613 | 48 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3614 | | // Coord. |
3615 | 48 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3616 | 48 | sampleHelper.coord[3], |
3617 | | // Offset. |
3618 | 48 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3619 | | // CmpVal. |
3620 | 48 | sampleHelper.compareValue, |
3621 | | // Ddx. |
3622 | 48 | sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2], |
3623 | | // Ddy. |
3624 | 48 | sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2], |
3625 | | // Clamp. |
3626 | 48 | sampleHelper.clamp}; |
3627 | 48 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3628 | 48 | } break; |
3629 | 222 | case OP::OpCode::SampleCmp: { |
3630 | 222 | Value *sampleArgs[] = { |
3631 | 222 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3632 | | // Coord. |
3633 | 222 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3634 | 222 | sampleHelper.coord[3], |
3635 | | // Offset. |
3636 | 222 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3637 | | // CmpVal. |
3638 | 222 | sampleHelper.compareValue, |
3639 | | // Clamp. |
3640 | 222 | sampleHelper.clamp}; |
3641 | 222 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3642 | 222 | } break; |
3643 | 192 | case OP::OpCode::SampleCmpLevel: { |
3644 | 192 | Value *sampleArgs[] = { |
3645 | 192 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3646 | | // Coord. |
3647 | 192 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3648 | 192 | sampleHelper.coord[3], |
3649 | | // Offset. |
3650 | 192 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3651 | | // CmpVal. |
3652 | 192 | sampleHelper.compareValue, |
3653 | | // LOD. |
3654 | 192 | sampleHelper.lod}; |
3655 | 192 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3656 | 192 | } break; |
3657 | 140 | case OP::OpCode::SampleCmpLevelZero: |
3658 | 140 | default: { |
3659 | 140 | DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode"); |
3660 | 140 | Value *sampleArgs[] = { |
3661 | 140 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3662 | | // Coord. |
3663 | 140 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3664 | 140 | sampleHelper.coord[3], |
3665 | | // Offset. |
3666 | 140 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3667 | | // CmpVal. |
3668 | 140 | sampleHelper.compareValue}; |
3669 | 140 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3670 | 140 | } break; |
3671 | 4.39k | } |
3672 | | // CI is replaced in GenerateDxilSample. |
3673 | 4.39k | return nullptr; |
3674 | 4.39k | } |
3675 | | |
3676 | | // Gather intrinsics. |
3677 | | struct GatherHelper { |
3678 | | enum class GatherChannel { |
3679 | | GatherAll, |
3680 | | GatherRed, |
3681 | | GatherGreen, |
3682 | | GatherBlue, |
3683 | | GatherAlpha, |
3684 | | }; |
3685 | | |
3686 | | GatherHelper(CallInst *CI, OP::OpCode op, |
3687 | | HLObjectOperationLowerHelper *pObjHelper, |
3688 | | GatherHelper::GatherChannel ch); |
3689 | | |
3690 | | OP::OpCode opcode; |
3691 | | Value *texHandle; |
3692 | | Value *samplerHandle; |
3693 | | static const unsigned kMaxCoordDimensions = 4; |
3694 | | Value *coord[kMaxCoordDimensions]; |
3695 | | unsigned channel; |
3696 | | Value *special; // For CompareValue, Bias, LOD. |
3697 | | // Optional. |
3698 | | static const unsigned kMaxOffsetDimensions = 2; |
3699 | | Value *offset[kMaxOffsetDimensions]; |
3700 | | // For the overload send different offset for each sample. |
3701 | | // Only save 3 sampleOffsets because use offset for normal overload as first |
3702 | | // sample offset. |
3703 | | static const unsigned kSampleOffsetDimensions = 3; |
3704 | | Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions]; |
3705 | | Value *status; |
3706 | | |
3707 | | bool hasSampleOffsets; |
3708 | | |
3709 | | unsigned maxHLOperandRead = 0; |
3710 | 6.79k | Value *ReadHLOperand(CallInst *CI, unsigned opIdx) { |
3711 | 6.79k | if (CI->getNumArgOperands() > opIdx) { |
3712 | 4.58k | maxHLOperandRead = std::max(maxHLOperandRead, opIdx); |
3713 | 4.58k | return CI->getArgOperand(opIdx); |
3714 | 4.58k | } |
3715 | 2.20k | return nullptr; |
3716 | 6.79k | } |
3717 | | void TranslateCoord(CallInst *CI, unsigned coordIdx, |
3718 | 1.73k | unsigned coordDimensions) { |
3719 | 1.73k | Value *coordArg = ReadHLOperand(CI, coordIdx); |
3720 | 1.73k | DXASSERT_NOMSG(coordArg); |
3721 | 1.73k | DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions, |
3722 | 1.73k | "otherwise, HL coordinate dimensions mismatch"); |
3723 | 1.73k | IRBuilder<> Builder(CI); |
3724 | 5.80k | for (unsigned i = 0; i < coordDimensions; i++4.06k ) |
3725 | 4.06k | coord[i] = Builder.CreateExtractElement(coordArg, i); |
3726 | 1.73k | Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3727 | 4.62k | for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++2.88k ) |
3728 | 2.88k | coord[i] = undefF; |
3729 | 1.73k | } |
3730 | 1.73k | void SetStatus(CallInst *CI, unsigned statusIdx) { |
3731 | 1.73k | status = ReadHLOperand(CI, statusIdx); |
3732 | 1.73k | } |
3733 | | void TranslateOffset(CallInst *CI, unsigned offsetIdx, |
3734 | 1.73k | unsigned offsetDimensions) { |
3735 | 1.73k | IntegerType *i32Ty = Type::getInt32Ty(CI->getContext()); |
3736 | 1.73k | if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) { |
3737 | 804 | DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions, |
3738 | 804 | "otherwise, HL coordinate dimensions mismatch"); |
3739 | 804 | IRBuilder<> Builder(CI); |
3740 | 2.41k | for (unsigned i = 0; i < offsetDimensions; i++1.60k ) |
3741 | 1.60k | offset[i] = Builder.CreateExtractElement(offsetArg, i); |
3742 | 934 | } else { |
3743 | | // Use zeros for offsets when not specified, not undef. |
3744 | 934 | Value *zero = ConstantInt::get(i32Ty, (uint64_t)0); |
3745 | 2.14k | for (unsigned i = 0; i < offsetDimensions; i++1.21k ) |
3746 | 1.21k | offset[i] = zero; |
3747 | 934 | } |
3748 | | // Use undef for components that should not be used for this resource dim. |
3749 | 1.73k | Value *undefI = UndefValue::get(i32Ty); |
3750 | 2.39k | for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++656 ) |
3751 | 656 | offset[i] = undefI; |
3752 | 1.73k | } |
3753 | | void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx, |
3754 | 848 | unsigned offsetDimensions) { |
3755 | 848 | Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext())); |
3756 | 848 | if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) { |
3757 | 344 | hasSampleOffsets = true; |
3758 | 344 | IRBuilder<> Builder(CI); |
3759 | 1.37k | for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++1.03k ) { |
3760 | 1.03k | Value *offsetArg = ReadHLOperand(CI, offsetIdx + ch); |
3761 | 3.09k | for (unsigned i = 0; i < offsetDimensions; i++2.06k ) |
3762 | 2.06k | sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i); |
3763 | 1.03k | for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++0 ) |
3764 | 0 | sampleOffsets[ch][i] = undefI; |
3765 | 1.03k | } |
3766 | 344 | } |
3767 | 848 | } |
3768 | | // Update the offset args for gather with sample offset at sampleIdx. |
3769 | | void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs, |
3770 | 1.03k | unsigned sampleIdx) { |
3771 | 1.03k | unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx; |
3772 | 3.09k | for (unsigned i = 0; i < kMaxOffsetDimensions; i++2.06k ) |
3773 | | // -1 because offset for sample 0 is in GatherHelper::offset. |
3774 | 2.06k | gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i]; |
3775 | 1.03k | } |
3776 | | }; |
3777 | | |
3778 | | GatherHelper::GatherHelper(CallInst *CI, OP::OpCode op, |
3779 | | HLObjectOperationLowerHelper *pObjHelper, |
3780 | | GatherHelper::GatherChannel ch) |
3781 | 1.73k | : opcode(op), special(nullptr), hasSampleOffsets(false) { |
3782 | | |
3783 | 1.73k | switch (ch) { |
3784 | 626 | case GatherChannel::GatherAll: |
3785 | 626 | channel = 0; |
3786 | 626 | break; |
3787 | 320 | case GatherChannel::GatherRed: |
3788 | 320 | channel = 0; |
3789 | 320 | break; |
3790 | 256 | case GatherChannel::GatherGreen: |
3791 | 256 | channel = 1; |
3792 | 256 | break; |
3793 | 272 | case GatherChannel::GatherBlue: |
3794 | 272 | channel = 2; |
3795 | 272 | break; |
3796 | 264 | case GatherChannel::GatherAlpha: |
3797 | 264 | channel = 3; |
3798 | 264 | break; |
3799 | 1.73k | } |
3800 | | |
3801 | 1.73k | IRBuilder<> Builder(CI); |
3802 | 1.73k | texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3803 | 1.73k | samplerHandle = CI->getArgOperand(HLOperandIndex::kSampleSamplerArgIndex); |
3804 | | |
3805 | 1.73k | DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle); |
3806 | 1.73k | if (RK == DXIL::ResourceKind::Invalid) { |
3807 | 0 | opcode = DXIL::OpCode::NumOpCodes; |
3808 | 0 | return; |
3809 | 0 | } |
3810 | 1.73k | unsigned coordSize = DxilResource::GetNumCoords(RK); |
3811 | 1.73k | unsigned offsetSize = DxilResource::GetNumOffsets(RK); |
3812 | 1.73k | bool cube = RK == DXIL::ResourceKind::TextureCube || |
3813 | 1.73k | RK == DXIL::ResourceKind::TextureCubeArray1.57k ; |
3814 | | |
3815 | 1.73k | const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex; |
3816 | 1.73k | TranslateCoord(CI, kCoordArgIdx, coordSize); |
3817 | | |
3818 | 1.73k | switch (op) { |
3819 | 1.04k | case OP::OpCode::TextureGather: { |
3820 | 1.04k | unsigned statusIdx; |
3821 | 1.04k | if (cube) { |
3822 | 168 | TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize); |
3823 | 168 | statusIdx = HLOperandIndex::kGatherCubeStatusArgIndex; |
3824 | 880 | } else { |
3825 | 880 | TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize); |
3826 | | // Gather all don't have sample offset version overload. |
3827 | 880 | if (ch != GatherChannel::GatherAll) |
3828 | 536 | TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex, |
3829 | 536 | offsetSize); |
3830 | 880 | statusIdx = hasSampleOffsets |
3831 | 880 | ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex216 |
3832 | 880 | : HLOperandIndex::kGatherStatusArgIndex664 ; |
3833 | 880 | } |
3834 | 1.04k | SetStatus(CI, statusIdx); |
3835 | 1.04k | } break; |
3836 | 546 | case OP::OpCode::TextureGatherCmp: { |
3837 | 546 | special = ReadHLOperand(CI, HLOperandIndex::kGatherCmpCmpValArgIndex); |
3838 | 546 | unsigned statusIdx; |
3839 | 546 | if (cube) { |
3840 | 160 | TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize); |
3841 | 160 | statusIdx = HLOperandIndex::kGatherCmpCubeStatusArgIndex; |
3842 | 386 | } else { |
3843 | 386 | TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize); |
3844 | | // Gather all don't have sample offset version overload. |
3845 | 386 | if (ch != GatherChannel::GatherAll) |
3846 | 312 | TranslateSampleOffset( |
3847 | 312 | CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex, offsetSize); |
3848 | 386 | statusIdx = hasSampleOffsets |
3849 | 386 | ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex128 |
3850 | 386 | : HLOperandIndex::kGatherCmpStatusArgIndex258 ; |
3851 | 386 | } |
3852 | 546 | SetStatus(CI, statusIdx); |
3853 | 546 | } break; |
3854 | 144 | case OP::OpCode::TextureGatherRaw: { |
3855 | 144 | unsigned statusIdx; |
3856 | 144 | TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize); |
3857 | | // Gather all don't have sample offset version overload. |
3858 | 144 | DXASSERT(ch == GatherChannel::GatherAll, |
3859 | 144 | "Raw gather must use all channels"); |
3860 | 144 | DXASSERT(!cube, "Raw gather can't be used with cube textures"); |
3861 | 144 | DXASSERT(!hasSampleOffsets, |
3862 | 144 | "Raw gather doesn't support individual offsets"); |
3863 | 144 | statusIdx = HLOperandIndex::kGatherStatusArgIndex; |
3864 | 144 | SetStatus(CI, statusIdx); |
3865 | 144 | } break; |
3866 | 0 | default: |
3867 | 0 | DXASSERT(0, "invalid opcode for Gather"); |
3868 | 0 | break; |
3869 | 1.73k | } |
3870 | 1.73k | DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1, |
3871 | 1.73k | "otherwise, unused HL arguments for Sample op"); |
3872 | 1.73k | } |
3873 | | |
3874 | | void GenerateDxilGather(CallInst *CI, Function *F, |
3875 | | MutableArrayRef<Value *> gatherArgs, |
3876 | 1.73k | GatherHelper &helper, hlsl::OP *hlslOp) { |
3877 | 1.73k | IRBuilder<> Builder(CI); |
3878 | | |
3879 | 1.73k | CallInst *call = Builder.CreateCall(F, gatherArgs); |
3880 | | |
3881 | 1.73k | dxilutil::MigrateDebugValue(CI, call); |
3882 | | |
3883 | 1.73k | Value *retVal; |
3884 | 1.73k | if (!helper.hasSampleOffsets) { |
3885 | | // extract value part |
3886 | 1.39k | retVal = ScalarizeResRet(CI->getType(), call, Builder); |
3887 | 1.39k | } else { |
3888 | 344 | retVal = UndefValue::get(CI->getType()); |
3889 | 344 | Value *elt = Builder.CreateExtractValue(call, (uint64_t)0); |
3890 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0); |
3891 | | |
3892 | 344 | helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1); |
3893 | 344 | CallInst *callY = Builder.CreateCall(F, gatherArgs); |
3894 | 344 | elt = Builder.CreateExtractValue(callY, (uint64_t)1); |
3895 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, 1); |
3896 | | |
3897 | 344 | helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2); |
3898 | 344 | CallInst *callZ = Builder.CreateCall(F, gatherArgs); |
3899 | 344 | elt = Builder.CreateExtractValue(callZ, (uint64_t)2); |
3900 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, 2); |
3901 | | |
3902 | 344 | helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3); |
3903 | 344 | CallInst *callW = Builder.CreateCall(F, gatherArgs); |
3904 | 344 | elt = Builder.CreateExtractValue(callW, (uint64_t)3); |
3905 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, 3); |
3906 | | |
3907 | | // TODO: UpdateStatus for each gather call. |
3908 | 344 | } |
3909 | | |
3910 | | // Replace ret val. |
3911 | 1.73k | CI->replaceAllUsesWith(retVal); |
3912 | | |
3913 | | // Get status |
3914 | 1.73k | if (helper.status) { |
3915 | 464 | UpdateStatus(call, helper.status, Builder, hlslOp); |
3916 | 464 | } |
3917 | 1.73k | } |
3918 | | |
3919 | | Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3920 | | HLOperationLowerHelper &helper, |
3921 | | HLObjectOperationLowerHelper *pObjHelper, |
3922 | 1.73k | bool &Translated) { |
3923 | 1.73k | hlsl::OP *hlslOP = &helper.hlslOP; |
3924 | 1.73k | GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll; |
3925 | 1.73k | switch (IOP) { |
3926 | 376 | case IntrinsicOp::MOP_Gather: |
3927 | 482 | case IntrinsicOp::MOP_GatherCmp: |
3928 | 626 | case IntrinsicOp::MOP_GatherRaw: |
3929 | 626 | ch = GatherHelper::GatherChannel::GatherAll; |
3930 | 626 | break; |
3931 | 192 | case IntrinsicOp::MOP_GatherRed: |
3932 | 320 | case IntrinsicOp::MOP_GatherCmpRed: |
3933 | 320 | ch = GatherHelper::GatherChannel::GatherRed; |
3934 | 320 | break; |
3935 | 152 | case IntrinsicOp::MOP_GatherGreen: |
3936 | 256 | case IntrinsicOp::MOP_GatherCmpGreen: |
3937 | 256 | ch = GatherHelper::GatherChannel::GatherGreen; |
3938 | 256 | break; |
3939 | 168 | case IntrinsicOp::MOP_GatherBlue: |
3940 | 272 | case IntrinsicOp::MOP_GatherCmpBlue: |
3941 | 272 | ch = GatherHelper::GatherChannel::GatherBlue; |
3942 | 272 | break; |
3943 | 160 | case IntrinsicOp::MOP_GatherAlpha: |
3944 | 264 | case IntrinsicOp::MOP_GatherCmpAlpha: |
3945 | 264 | ch = GatherHelper::GatherChannel::GatherAlpha; |
3946 | 264 | break; |
3947 | 0 | default: |
3948 | 0 | DXASSERT(0, "invalid gather intrinsic"); |
3949 | 0 | break; |
3950 | 1.73k | } |
3951 | | |
3952 | 1.73k | GatherHelper gatherHelper(CI, opcode, pObjHelper, ch); |
3953 | | |
3954 | 1.73k | if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) { |
3955 | 0 | Translated = false; |
3956 | 0 | return nullptr; |
3957 | 0 | } |
3958 | 1.73k | Type *Ty = CI->getType(); |
3959 | | |
3960 | 1.73k | Function *F = hlslOP->GetOpFunc(gatherHelper.opcode, Ty->getScalarType()); |
3961 | | |
3962 | 1.73k | Constant *opArg = hlslOP->GetU32Const((unsigned)gatherHelper.opcode); |
3963 | 1.73k | Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel); |
3964 | | |
3965 | 1.73k | switch (opcode) { |
3966 | 1.04k | case OP::OpCode::TextureGather: { |
3967 | 1.04k | Value *gatherArgs[] = {opArg, gatherHelper.texHandle, |
3968 | 1.04k | gatherHelper.samplerHandle, |
3969 | | // Coord. |
3970 | 1.04k | gatherHelper.coord[0], gatherHelper.coord[1], |
3971 | 1.04k | gatherHelper.coord[2], gatherHelper.coord[3], |
3972 | | // Offset. |
3973 | 1.04k | gatherHelper.offset[0], gatherHelper.offset[1], |
3974 | | // Channel. |
3975 | 1.04k | channelArg}; |
3976 | 1.04k | GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP); |
3977 | 1.04k | } break; |
3978 | 546 | case OP::OpCode::TextureGatherCmp: { |
3979 | 546 | Value *gatherArgs[] = {opArg, gatherHelper.texHandle, |
3980 | 546 | gatherHelper.samplerHandle, |
3981 | | // Coord. |
3982 | 546 | gatherHelper.coord[0], gatherHelper.coord[1], |
3983 | 546 | gatherHelper.coord[2], gatherHelper.coord[3], |
3984 | | // Offset. |
3985 | 546 | gatherHelper.offset[0], gatherHelper.offset[1], |
3986 | | // Channel. |
3987 | 546 | channelArg, |
3988 | | // CmpVal. |
3989 | 546 | gatherHelper.special}; |
3990 | 546 | GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP); |
3991 | 546 | } break; |
3992 | 144 | case OP::OpCode::TextureGatherRaw: { |
3993 | 144 | Value *gatherArgs[] = {opArg, gatherHelper.texHandle, |
3994 | 144 | gatherHelper.samplerHandle, |
3995 | | // Coord. |
3996 | 144 | gatherHelper.coord[0], gatherHelper.coord[1], |
3997 | 144 | gatherHelper.coord[2], gatherHelper.coord[3], |
3998 | | // Offset. |
3999 | 144 | gatherHelper.offset[0], gatherHelper.offset[1]}; |
4000 | 144 | GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP); |
4001 | 144 | break; |
4002 | 0 | } |
4003 | 0 | default: |
4004 | 0 | DXASSERT(0, "invalid opcode for Gather"); |
4005 | 0 | break; |
4006 | 1.73k | } |
4007 | | // CI is replaced in GenerateDxilGather. |
4008 | 1.73k | return nullptr; |
4009 | 1.73k | } |
4010 | | |
4011 | | static Value * |
4012 | | TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
4013 | | HLOperationLowerHelper &helper, |
4014 | | HLObjectOperationLowerHelper *pObjHelper, |
4015 | 300 | bool &Translated) { |
4016 | 300 | hlsl::OP *hlslOP = &helper.hlslOP; |
4017 | 300 | SampleHelper sampleHelper(CI, opcode, pObjHelper); |
4018 | | |
4019 | 300 | if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) { |
4020 | 0 | Translated = false; |
4021 | 0 | return nullptr; |
4022 | 0 | } |
4023 | 300 | Type *Ty = CI->getType(); |
4024 | | |
4025 | 300 | Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); |
4026 | | |
4027 | 300 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
4028 | | |
4029 | 300 | IRBuilder<> Builder(CI); |
4030 | | |
4031 | 300 | switch (opcode) { |
4032 | 180 | case OP::OpCode::WriteSamplerFeedback: { |
4033 | 180 | Value *samplerFeedbackArgs[] = { |
4034 | 180 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
4035 | 180 | sampleHelper.samplerHandle, |
4036 | | // Coord. |
4037 | 180 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
4038 | 180 | sampleHelper.coord[3], |
4039 | | // Clamp. |
4040 | 180 | sampleHelper.clamp}; |
4041 | 180 | return Builder.CreateCall(F, samplerFeedbackArgs); |
4042 | 0 | } break; |
4043 | 64 | case OP::OpCode::WriteSamplerFeedbackBias: { |
4044 | 64 | Value *samplerFeedbackArgs[] = { |
4045 | 64 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
4046 | 64 | sampleHelper.samplerHandle, |
4047 | | // Coord. |
4048 | 64 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
4049 | 64 | sampleHelper.coord[3], |
4050 | | // Bias. |
4051 | 64 | sampleHelper.bias, |
4052 | | // Clamp. |
4053 | 64 | sampleHelper.clamp}; |
4054 | 64 | return Builder.CreateCall(F, samplerFeedbackArgs); |
4055 | 0 | } break; |
4056 | 32 | case OP::OpCode::WriteSamplerFeedbackGrad: { |
4057 | 32 | Value *samplerFeedbackArgs[] = { |
4058 | 32 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
4059 | 32 | sampleHelper.samplerHandle, |
4060 | | // Coord. |
4061 | 32 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
4062 | 32 | sampleHelper.coord[3], |
4063 | | // Ddx. |
4064 | 32 | sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2], |
4065 | | // Ddy. |
4066 | 32 | sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2], |
4067 | | // Clamp. |
4068 | 32 | sampleHelper.clamp}; |
4069 | 32 | return Builder.CreateCall(F, samplerFeedbackArgs); |
4070 | 0 | } break; |
4071 | 24 | case OP::OpCode::WriteSamplerFeedbackLevel: { |
4072 | 24 | Value *samplerFeedbackArgs[] = { |
4073 | 24 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
4074 | 24 | sampleHelper.samplerHandle, |
4075 | | // Coord. |
4076 | 24 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
4077 | 24 | sampleHelper.coord[3], |
4078 | | // LOD. |
4079 | 24 | sampleHelper.lod}; |
4080 | 24 | return Builder.CreateCall(F, samplerFeedbackArgs); |
4081 | 0 | } break; |
4082 | 0 | default: |
4083 | 0 | DXASSERT(false, "otherwise, unknown SamplerFeedback Op"); |
4084 | 0 | break; |
4085 | 300 | } |
4086 | 0 | return nullptr; |
4087 | 300 | } |
4088 | | |
4089 | | // Load/Store intrinsics. |
4090 | 21.8k | OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) { |
4091 | 21.8k | switch (RK) { |
4092 | 3.46k | case DxilResource::Kind::RawBuffer: |
4093 | 14.1k | case DxilResource::Kind::StructuredBuffer: |
4094 | 14.1k | return OP::OpCode::RawBufferLoad; |
4095 | 2.56k | case DxilResource::Kind::TypedBuffer: |
4096 | 2.56k | return OP::OpCode::BufferLoad; |
4097 | 0 | case DxilResource::Kind::Invalid: |
4098 | 0 | DXASSERT(0, "invalid resource kind"); |
4099 | 0 | break; |
4100 | 5.05k | default: |
4101 | 5.05k | return OP::OpCode::TextureLoad; |
4102 | 21.8k | } |
4103 | 0 | return OP::OpCode::TextureLoad; |
4104 | 21.8k | } |
4105 | | |
4106 | | struct ResLoadHelper { |
4107 | | // Default constructor uses CI load intrinsic call |
4108 | | // to get the retval and various location indicators. |
4109 | | ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, |
4110 | | Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr); |
4111 | | // Alternative constructor explicitly sets the index. |
4112 | | // Used for some subscript operators that feed the generic HL call inst |
4113 | | // into a load op and by the matrixload call instruction. |
4114 | | ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, |
4115 | | Value *Offset, Value *status = nullptr, Value *mip = nullptr) |
4116 | 10.1k | : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), |
4117 | 10.1k | addr(idx), offset(Offset), status(status), mipLevel(mip) { |
4118 | 10.1k | opcode = LoadOpFromResKind(RK); |
4119 | 10.1k | Type *Ty = Inst->getType(); |
4120 | 10.1k | if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy()10.0k && |
4121 | 10.1k | Ty->getVectorNumElements() > 15.98k && |
4122 | 10.1k | Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()4.27k ) |
4123 | 1.40k | opcode = OP::OpCode::RawBufferVectorLoad; |
4124 | 10.1k | } |
4125 | | OP::OpCode opcode; |
4126 | | IntrinsicOp intrinsicOpCode; |
4127 | | unsigned dxilMajor; |
4128 | | unsigned dxilMinor; |
4129 | | Value *handle; |
4130 | | Value *retVal; |
4131 | | Value *addr; |
4132 | | Value *offset; |
4133 | | Value *status; |
4134 | | Value *mipLevel; |
4135 | | }; |
4136 | | |
4137 | | // Uses CI arguments to determine the index, offset, and mipLevel also depending |
4138 | | // on the RK/RC resource kind and class, which determine the opcode. |
4139 | | // Handle and IOP are set explicitly. |
4140 | | // For typed buffer loads, the call instruction feeds into a load |
4141 | | // represented by TyBufSubLoad which determines the instruction to replace. |
4142 | | // Otherwise, CI is replaced. |
4143 | | ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, |
4144 | | DxilResourceBase::Class RC, Value *hdl, |
4145 | | IntrinsicOp IOP, LoadInst *TyBufSubLoad) |
4146 | 11.6k | : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) { |
4147 | 11.6k | opcode = LoadOpFromResKind(RK); |
4148 | 11.6k | bool bForSubscript = false; |
4149 | 11.6k | if (TyBufSubLoad) { |
4150 | 2.81k | bForSubscript = true; |
4151 | 2.81k | retVal = TyBufSubLoad; |
4152 | 2.81k | } else |
4153 | 8.85k | retVal = CI; |
4154 | 11.6k | const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx; |
4155 | 11.6k | addr = CI->getArgOperand(kAddrIdx); |
4156 | 11.6k | unsigned argc = CI->getNumArgOperands(); |
4157 | 11.6k | Type *i32Ty = Type::getInt32Ty(CI->getContext()); |
4158 | 11.6k | unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; |
4159 | 11.6k | unsigned OffsetIdx = HLOperandIndex::kInvalidIdx; |
4160 | | |
4161 | 11.6k | if (opcode == OP::OpCode::TextureLoad) { |
4162 | 4.91k | bool IsMS = (RK == DxilResource::Kind::Texture2DMS || |
4163 | 4.91k | RK == DxilResource::Kind::Texture2DMSArray4.66k ); |
4164 | | // Set mip and status index. |
4165 | 4.91k | offset = UndefValue::get(i32Ty); |
4166 | 4.91k | if (IsMS) { |
4167 | | // Retrieve appropriate MS parameters. |
4168 | 408 | StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; |
4169 | | // MS textures keep the sample param (mipLevel) regardless of writability. |
4170 | 408 | if (bForSubscript) |
4171 | 50 | mipLevel = ConstantInt::get(i32Ty, 0); |
4172 | 358 | else |
4173 | 358 | mipLevel = |
4174 | 358 | CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); |
4175 | 4.51k | } else if (RC == DxilResourceBase::Class::UAV) { |
4176 | | // DXIL requires that non-MS UAV accesses set miplevel to undef. |
4177 | 2.07k | mipLevel = UndefValue::get(i32Ty); |
4178 | 2.07k | StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx; |
4179 | 2.43k | } else { |
4180 | | // Non-MS SRV case. |
4181 | 2.43k | StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx; |
4182 | 2.43k | if (bForSubscript) |
4183 | | // Having no miplevel param, single subscripted SRVs default to 0. |
4184 | 1.40k | mipLevel = ConstantInt::get(i32Ty, 0); |
4185 | 1.03k | else |
4186 | | // Mip is stored at the last channel of the coordinate vector. |
4187 | 1.03k | mipLevel = IRBuilder<>(CI).CreateExtractElement( |
4188 | 1.03k | addr, DxilResource::GetNumCoords(RK)); |
4189 | 2.43k | } |
4190 | 4.91k | if (RC == DxilResourceBase::Class::SRV) |
4191 | 2.73k | OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx304 |
4192 | 2.73k | : HLOperandIndex::kTexLoadOffsetOpIdx2.43k ; |
4193 | 6.75k | } else if (opcode == OP::OpCode::RawBufferLoad) { |
4194 | | // If native vectors are available and this load had a vector |
4195 | | // with more than one elements, convert the RawBufferLod to the |
4196 | | // native vector variant RawBufferVectorLoad. |
4197 | 4.19k | Type *Ty = CI->getType(); |
4198 | 4.19k | if (Ty->isVectorTy() && Ty->getVectorNumElements() > 12.20k && |
4199 | 4.19k | CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()2.07k ) |
4200 | 1.00k | opcode = OP::OpCode::RawBufferVectorLoad; |
4201 | 4.19k | } |
4202 | | |
4203 | | // Set offset. |
4204 | 11.6k | if (DXIL::IsStructuredBuffer(RK)) |
4205 | | // Structured buffers receive no exterior offset in this constructor, |
4206 | | // but may need to increment it later. |
4207 | 1.29k | offset = ConstantInt::get(i32Ty, 0U); |
4208 | 10.3k | else if (argc > OffsetIdx) |
4209 | | // Textures may set the offset from an explicit argument. |
4210 | 102 | offset = CI->getArgOperand(OffsetIdx); |
4211 | 10.2k | else |
4212 | | // All other cases use undef. |
4213 | 10.2k | offset = UndefValue::get(i32Ty); |
4214 | | |
4215 | | // Retrieve status value if provided. |
4216 | 11.6k | if (argc > StatusIdx) |
4217 | 1.12k | status = CI->getArgOperand(StatusIdx); |
4218 | 11.6k | } |
4219 | | |
4220 | | void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, |
4221 | | hlsl::OP *OP, HLResource::Kind RK, |
4222 | | const DataLayout &DL); |
4223 | | |
4224 | | static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, |
4225 | 11.6k | hlsl::OP *OP) { |
4226 | 11.6k | unsigned mask = 0; |
4227 | | |
4228 | 11.6k | switch (NumComponents) { |
4229 | 0 | case 0: |
4230 | 0 | break; |
4231 | 7.03k | case 1: |
4232 | 7.03k | mask = DXIL::kCompMask_X; |
4233 | 7.03k | break; |
4234 | 1.14k | case 2: |
4235 | 1.14k | mask = DXIL::kCompMask_X | DXIL::kCompMask_Y; |
4236 | 1.14k | break; |
4237 | 606 | case 3: |
4238 | 606 | mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z; |
4239 | 606 | break; |
4240 | 2.89k | case 4: |
4241 | 2.89k | mask = DXIL::kCompMask_All; |
4242 | 2.89k | break; |
4243 | 0 | default: |
4244 | 0 | DXASSERT(false, "Cannot load more than 2 components for 64bit types."); |
4245 | 11.6k | } |
4246 | 11.6k | return OP->GetI8Const(mask); |
4247 | 11.6k | } |
4248 | | |
4249 | | Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, |
4250 | | Value *status, Type *EltTy, |
4251 | | MutableArrayRef<Value *> resultElts, hlsl::OP *OP, |
4252 | | IRBuilder<> &Builder, unsigned NumComponents, |
4253 | | Constant *alignment); |
4254 | | |
4255 | | // Sets up arguments for buffer load call. |
4256 | | static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper, |
4257 | | HLResource::Kind RK, |
4258 | | IRBuilder<> Builder, |
4259 | 18.1k | unsigned LdSize) { |
4260 | 18.1k | OP::OpCode opcode = helper.opcode; |
4261 | 18.1k | llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); |
4262 | | |
4263 | 18.1k | unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U3.18k : 8U14.9k ; |
4264 | 18.1k | alignment = std::min(alignment, LdSize); |
4265 | 18.1k | Constant *alignmentVal = Builder.getInt32(alignment); |
4266 | | |
4267 | | // Assemble args specific to the type bab/struct/typed: |
4268 | | // - Typed needs to handle the possibility of vector coords |
4269 | | // - Raws need to calculate alignment and mask values. |
4270 | 18.1k | SmallVector<Value *, 10> Args; |
4271 | 18.1k | Args.emplace_back(opArg); // opcode @0. |
4272 | 18.1k | Args.emplace_back(helper.handle); // Resource handle @1 |
4273 | | |
4274 | | // Set offsets appropriate for the load operation. |
4275 | 18.1k | bool isVectorAddr = helper.addr->getType()->isVectorTy(); |
4276 | 18.1k | if (opcode == OP::OpCode::TextureLoad) { |
4277 | 3.73k | llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty()); |
4278 | | |
4279 | | // Set mip level or sample for MS texutures @2. |
4280 | 3.73k | Args.emplace_back(helper.mipLevel); |
4281 | | // Set texture coords according to resource kind @3-5 |
4282 | | // Coords unused by the resource kind are undefs. |
4283 | 3.73k | unsigned coordSize = DxilResource::GetNumCoords(RK); |
4284 | 14.9k | for (unsigned i = 0; i < 3; i++11.1k ) |
4285 | 11.1k | if (i < coordSize) |
4286 | 7.33k | Args.emplace_back(isVectorAddr |
4287 | 7.33k | ? Builder.CreateExtractElement(helper.addr, i)6.91k |
4288 | 7.33k | : helper.addr420 ); |
4289 | 3.86k | else |
4290 | 3.86k | Args.emplace_back(undefI); |
4291 | | |
4292 | | // Set texture offsets according to resource kind @7-9 |
4293 | | // Coords unused by the resource kind are undefs. |
4294 | 3.73k | unsigned offsetSize = DxilResource::GetNumOffsets(RK); |
4295 | 3.73k | if (!helper.offset || isa<llvm::UndefValue>(helper.offset)) |
4296 | 3.63k | offsetSize = 0; |
4297 | 14.9k | for (unsigned i = 0; i < 3; i++11.1k ) |
4298 | 11.1k | if (i < offsetSize) |
4299 | 204 | Args.emplace_back(Builder.CreateExtractElement(helper.offset, i)); |
4300 | 10.9k | else |
4301 | 10.9k | Args.emplace_back(undefI); |
4302 | 14.3k | } else { |
4303 | | // If not TextureLoad, it could be a typed or raw buffer load. |
4304 | | // They have mostly similar arguments. |
4305 | 14.3k | DXASSERT(opcode == OP::OpCode::RawBufferLoad || |
4306 | 14.3k | opcode == OP::OpCode::RawBufferVectorLoad || |
4307 | 14.3k | opcode == OP::OpCode::BufferLoad, |
4308 | 14.3k | "Wrong opcode in get load args"); |
4309 | 14.3k | Args.emplace_back( |
4310 | 14.3k | isVectorAddr ? Builder.CreateExtractElement(helper.addr, (uint64_t)0)232 |
4311 | 14.3k | : helper.addr14.1k ); |
4312 | 14.3k | Args.emplace_back(helper.offset); |
4313 | 14.3k | if (opcode == OP::OpCode::RawBufferLoad) { |
4314 | | // Unlike typed buffer load, raw buffer load has mask and alignment. |
4315 | 10.7k | Args.emplace_back(nullptr); // Mask will be added later %4. |
4316 | 10.7k | Args.emplace_back(alignmentVal); // alignment @5. |
4317 | 10.7k | } else if (3.65k opcode == OP::OpCode::RawBufferVectorLoad3.65k ) { |
4318 | | // RawBufferVectorLoad takes just alignment, no mask. |
4319 | 2.41k | Args.emplace_back(alignmentVal); // alignment @4 |
4320 | 2.41k | } |
4321 | 14.3k | } |
4322 | 18.1k | return Args; |
4323 | 18.1k | } |
4324 | | |
4325 | 20.4k | static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) { |
4326 | 20.4k | return !EltTy->isIntegerTy(1) && |
4327 | 20.4k | DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits()20.1k ; |
4328 | 20.4k | } |
4329 | | |
4330 | | static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx, |
4331 | 20.4k | const DataLayout &DL) { |
4332 | 20.4k | Type *EltTy = Ty->getScalarType(); |
4333 | 20.4k | if (!isMinPrecisionType(EltTy, DL)) |
4334 | 20.2k | return Ty; |
4335 | 166 | Type *WideTy = EltTy->isFloatingPointTy() ? Type::getFloatTy(Ctx)48 |
4336 | 166 | : Type::getInt32Ty(Ctx)118 ; |
4337 | 166 | if (Ty->isVectorTy()) |
4338 | 158 | return VectorType::get(WideTy, Ty->getVectorNumElements()); |
4339 | 8 | return WideTy; |
4340 | 166 | } |
4341 | | |
4342 | | // Emits as many calls as needed to load the full vector |
4343 | | // Performs any needed extractions and conversions of the results. |
4344 | | Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, |
4345 | | IRBuilder<> &Builder, hlsl::OP *OP, |
4346 | 18.1k | const DataLayout &DL) { |
4347 | 18.1k | OP::OpCode opcode = helper.opcode; |
4348 | 18.1k | Type *Ty = helper.retVal->getType(); |
4349 | | |
4350 | 18.1k | unsigned NumComponents = 1; |
4351 | 18.1k | if (Ty->isVectorTy()) |
4352 | 11.7k | NumComponents = Ty->getVectorNumElements(); |
4353 | | |
4354 | 18.1k | const bool isTyped = DXIL::IsTyped(RK); |
4355 | 18.1k | Type *OrigEltTy = Ty->getScalarType(); |
4356 | 18.1k | Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL); |
4357 | 18.1k | Type *EltTy = WidenedTy->getScalarType(); |
4358 | 18.1k | const bool isMinPrec = (WidenedTy != Ty); |
4359 | 18.1k | const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy()16.9k ); |
4360 | 18.1k | const bool isBool = EltTy->isIntegerTy(1); |
4361 | | // DXIL buffer loads require i32; narrow types are reconverted after load. |
4362 | 18.1k | if (isBool || (17.7k is6417.7k && isTyped2.58k )) |
4363 | 480 | EltTy = Builder.getInt32Ty(); |
4364 | | |
4365 | | // Calculate load size with the scalar memory element type. |
4366 | 18.1k | unsigned LdSize = DL.getTypeAllocSize(EltTy); |
4367 | | |
4368 | | // Adjust number of components as needed. |
4369 | 18.1k | if (is64 && isTyped2.58k ) { |
4370 | | // 64-bit types are stored as int32 pairs in typed buffers. |
4371 | 146 | DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords."); |
4372 | 146 | NumComponents *= 2; |
4373 | 17.9k | } else if (opcode == OP::OpCode::RawBufferVectorLoad) { |
4374 | | // Native vector loads only have a single vector element in ResRet. |
4375 | 2.41k | EltTy = VectorType::get(EltTy, NumComponents); |
4376 | 2.41k | NumComponents = 1; |
4377 | 2.41k | } |
4378 | | |
4379 | 18.1k | SmallVector<Value *, 10> Args = GetBufLoadArgs(helper, RK, Builder, LdSize); |
4380 | | |
4381 | | // Keep track of the first load for debug info migration. |
4382 | 18.1k | Value *FirstLd = nullptr; |
4383 | | |
4384 | 18.1k | unsigned OffsetIdx = 0; |
4385 | 18.1k | if (RK == DxilResource::Kind::RawBuffer) |
4386 | | // Raw buffers can't use offset param. Add to coord index. |
4387 | 3.18k | OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; |
4388 | 14.9k | else if (RK == DxilResource::Kind::StructuredBuffer) |
4389 | 9.96k | OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; |
4390 | | |
4391 | | // Create call(s) to function object and collect results in Elts. |
4392 | | // Typed buffer loads are limited to one load of up to 4 32-bit values. |
4393 | | // Raw buffer loads might need multiple loads in chunks of 4. |
4394 | 18.1k | SmallVector<Value *, 4> Elts(NumComponents); |
4395 | 37.1k | for (unsigned i = 0; i < NumComponents;) { |
4396 | | // Load 4 elements or however many less than 4 are left to load. |
4397 | 19.0k | unsigned chunkSize = std::min(NumComponents - i, 4U); |
4398 | | |
4399 | | // Assign mask for raw buffer loads. |
4400 | 19.0k | if (opcode == OP::OpCode::RawBufferLoad) { |
4401 | 11.6k | Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = |
4402 | 11.6k | GetRawBufferMaskForETy(EltTy, chunkSize, OP); |
4403 | | // If we've loaded a chunk already, update offset to next chunk. |
4404 | 11.6k | if (FirstLd != nullptr) |
4405 | 916 | Args[OffsetIdx] = |
4406 | 916 | Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize)); |
4407 | 11.6k | } |
4408 | | |
4409 | 19.0k | Function *F = OP->GetOpFunc(opcode, EltTy); |
4410 | 19.0k | Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); |
4411 | 19.0k | unsigned StatusIndex; |
4412 | | |
4413 | | // Extract elements from returned ResRet. |
4414 | | // Native vector loads just have one vector element in the ResRet. |
4415 | | // Others have up to four scalars that need to be individually extracted. |
4416 | 19.0k | if (opcode == OP::OpCode::RawBufferVectorLoad) { |
4417 | 2.41k | Elts[i++] = Builder.CreateExtractValue(Ld, 0); |
4418 | 2.41k | StatusIndex = DXIL::kVecResRetStatusIndex; |
4419 | 16.6k | } else { |
4420 | 53.5k | for (unsigned j = 0; j < chunkSize; j++, i++36.9k ) |
4421 | 36.9k | Elts[i] = Builder.CreateExtractValue(Ld, j); |
4422 | 16.6k | StatusIndex = DXIL::kResRetStatusIndex; |
4423 | 16.6k | } |
4424 | | |
4425 | | // Update status. |
4426 | 19.0k | UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex); |
4427 | | |
4428 | 19.0k | if (!FirstLd) |
4429 | 18.1k | FirstLd = Ld; |
4430 | 19.0k | } |
4431 | 18.1k | DXASSERT(FirstLd, "No loads created by TranslateBufLoad"); |
4432 | | |
4433 | | // Convert loaded 32-bit integers to intended 64-bit type representation. |
4434 | 18.1k | if (isTyped) { |
4435 | 4.97k | Type *RegEltTy = Ty->getScalarType(); |
4436 | 4.97k | if (RegEltTy->isDoubleTy()) { |
4437 | 68 | Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy); |
4438 | 68 | Value *makeDoubleOpArg = |
4439 | 68 | Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); |
4440 | 68 | NumComponents /= 2; // Convert back to number of doubles. |
4441 | 160 | for (unsigned i = 0; i < NumComponents; i++92 ) { |
4442 | 92 | Value *lo = Elts[2 * i]; |
4443 | 92 | Value *hi = Elts[2 * i + 1]; |
4444 | 92 | Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); |
4445 | 92 | } |
4446 | 68 | EltTy = RegEltTy; |
4447 | 4.90k | } else if (RegEltTy->isIntegerTy(64)) { |
4448 | 78 | NumComponents /= 2; // Convert back to number of int64s. |
4449 | 192 | for (unsigned i = 0; i < NumComponents; i++114 ) { |
4450 | 114 | Value *lo = Elts[2 * i]; |
4451 | 114 | Value *hi = Elts[2 * i + 1]; |
4452 | 114 | lo = Builder.CreateZExt(lo, RegEltTy); |
4453 | 114 | hi = Builder.CreateZExt(hi, RegEltTy); |
4454 | 114 | hi = Builder.CreateShl(hi, 32); |
4455 | 114 | Elts[i] = Builder.CreateOr(lo, hi); |
4456 | 114 | } |
4457 | 78 | EltTy = RegEltTy; |
4458 | 78 | } |
4459 | 4.97k | } |
4460 | | |
4461 | | // Package elements into a vector as needed. |
4462 | 18.1k | Value *retValNew = nullptr; |
4463 | | // Scalar or native vector loads need not construct vectors from elements. |
4464 | 18.1k | if (!Ty->isVectorTy() || opcode == OP::OpCode::RawBufferVectorLoad11.7k ) { |
4465 | 8.81k | retValNew = Elts[0]; |
4466 | 9.30k | } else { |
4467 | 9.30k | retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); |
4468 | 39.6k | for (unsigned i = 0; i < NumComponents; i++30.3k ) |
4469 | 30.3k | retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i); |
4470 | 9.30k | } |
4471 | | |
4472 | | // Convert loaded int32 bool results to i1 register representation. |
4473 | 18.1k | if (isBool) |
4474 | 334 | retValNew = Builder.CreateICmpNE( |
4475 | 334 | retValNew, Constant::getNullValue(retValNew->getType())); |
4476 | | |
4477 | | // DXIL loads min precision as 32-bit; narrow back to original IR type. |
4478 | 18.1k | if (isMinPrec) { |
4479 | 142 | if (OrigEltTy->isIntegerTy()) |
4480 | 102 | retValNew = Builder.CreateTrunc(retValNew, Ty); |
4481 | 40 | else |
4482 | 40 | retValNew = Builder.CreateFPTrunc(retValNew, Ty); |
4483 | 142 | } |
4484 | | |
4485 | 18.1k | helper.retVal->replaceAllUsesWith(retValNew); |
4486 | 18.1k | helper.retVal = retValNew; |
4487 | | |
4488 | 18.1k | return FirstLd; |
4489 | 18.1k | } |
4490 | | |
4491 | | Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
4492 | | HLOperationLowerHelper &helper, |
4493 | | HLObjectOperationLowerHelper *pObjHelper, |
4494 | 6.21k | bool &Translated) { |
4495 | 6.21k | hlsl::OP *hlslOP = &helper.hlslOP; |
4496 | 6.21k | DataLayout &DL = helper.dataLayout; |
4497 | 6.21k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
4498 | | |
4499 | 6.21k | IRBuilder<> Builder(CI); |
4500 | | |
4501 | 6.21k | DXIL::ResourceClass RC = pObjHelper->GetRC(handle); |
4502 | 6.21k | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
4503 | | |
4504 | 6.21k | ResLoadHelper ldHelper(CI, RK, RC, handle, IOP); |
4505 | 6.21k | Type *Ty = CI->getType(); |
4506 | 6.21k | Value *Ld = nullptr; |
4507 | 6.21k | if (Ty->isPointerTy()) { |
4508 | 1.05k | DXASSERT(!DxilResource::IsAnyTexture(RK), |
4509 | 1.05k | "Textures should not be treated as structured buffers."); |
4510 | 1.05k | TranslateStructBufSubscript(cast<CallInst>(ldHelper.retVal), handle, |
4511 | 1.05k | ldHelper.status, hlslOP, RK, DL); |
4512 | 5.16k | } else { |
4513 | 5.16k | Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); |
4514 | 5.16k | dxilutil::MigrateDebugValue(CI, Ld); |
4515 | 5.16k | } |
4516 | | // CI is replaced by above translation calls.. |
4517 | 6.21k | return nullptr; |
4518 | 6.21k | } |
4519 | | |
4520 | | // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi } |
4521 | | void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size, |
4522 | | MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP, |
4523 | 218 | IRBuilder<> &Builder) { |
4524 | 218 | Type *i32Ty = Builder.getInt32Ty(); |
4525 | 218 | Type *doubleTy = Builder.getDoubleTy(); |
4526 | 218 | Value *undefI32 = UndefValue::get(i32Ty); |
4527 | | |
4528 | 218 | if (EltTy == doubleTy) { |
4529 | 40 | Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy); |
4530 | 40 | Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble); |
4531 | 92 | for (unsigned i = 0; i < size; i++52 ) { |
4532 | 52 | if (isa<UndefValue>(vals[i])) { |
4533 | 0 | vals32[2 * i] = undefI32; |
4534 | 0 | vals32[2 * i + 1] = undefI32; |
4535 | 52 | } else { |
4536 | 52 | Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]}); |
4537 | 52 | Value *lo = Builder.CreateExtractValue(retVal, 0); |
4538 | 52 | Value *hi = Builder.CreateExtractValue(retVal, 1); |
4539 | 52 | vals32[2 * i] = lo; |
4540 | 52 | vals32[2 * i + 1] = hi; |
4541 | 52 | } |
4542 | 52 | } |
4543 | 178 | } else { |
4544 | 372 | for (unsigned i = 0; i < size; i++194 ) { |
4545 | 194 | if (isa<UndefValue>(vals[i])) { |
4546 | 0 | vals32[2 * i] = undefI32; |
4547 | 0 | vals32[2 * i + 1] = undefI32; |
4548 | 194 | } else { |
4549 | 194 | Value *lo = Builder.CreateTrunc(vals[i], i32Ty); |
4550 | 194 | Value *hi = Builder.CreateLShr(vals[i], 32); |
4551 | 194 | hi = Builder.CreateTrunc(hi, i32Ty); |
4552 | 194 | vals32[2 * i] = lo; |
4553 | 194 | vals32[2 * i + 1] = hi; |
4554 | 194 | } |
4555 | 194 | } |
4556 | 178 | } |
4557 | 218 | } |
4558 | | |
4559 | | void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, |
4560 | | Value *Idx, Value *offset, IRBuilder<> &Builder, |
4561 | 16.7k | hlsl::OP *OP, Value *sampIdx = nullptr) { |
4562 | 16.7k | Type *Ty = val->getType(); |
4563 | 16.7k | OP::OpCode opcode = OP::OpCode::NumOpCodes; |
4564 | 16.7k | bool IsTyped = true; |
4565 | 16.7k | switch (RK) { |
4566 | 3.06k | case DxilResource::Kind::RawBuffer: |
4567 | 13.4k | case DxilResource::Kind::StructuredBuffer: |
4568 | 13.4k | IsTyped = false; |
4569 | 13.4k | opcode = OP::OpCode::RawBufferStore; |
4570 | | // Where shader model and type allows, use vector store intrinsic. |
4571 | 13.4k | if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && |
4572 | 13.4k | Ty->isVectorTy()4.90k && Ty->getVectorNumElements() > 13.75k ) |
4573 | 2.33k | opcode = OP::OpCode::RawBufferVectorStore; |
4574 | 13.4k | break; |
4575 | 1.23k | case DxilResource::Kind::TypedBuffer: |
4576 | 1.23k | opcode = OP::OpCode::BufferStore; |
4577 | 1.23k | break; |
4578 | 0 | case DxilResource::Kind::Invalid: |
4579 | 0 | DXASSERT(0, "invalid resource kind"); |
4580 | 0 | break; |
4581 | 48 | case DxilResource::Kind::Texture2DMS: |
4582 | 80 | case DxilResource::Kind::Texture2DMSArray: |
4583 | 80 | opcode = OP::OpCode::TextureStoreSample; |
4584 | 80 | break; |
4585 | 2.01k | default: |
4586 | 2.01k | opcode = OP::OpCode::TextureStore; |
4587 | 2.01k | break; |
4588 | 16.7k | } |
4589 | | |
4590 | 16.7k | Type *i32Ty = Builder.getInt32Ty(); |
4591 | 16.7k | Type *i64Ty = Builder.getInt64Ty(); |
4592 | 16.7k | Type *doubleTy = Builder.getDoubleTy(); |
4593 | 16.7k | Type *EltTy = Ty->getScalarType(); |
4594 | 16.7k | if (EltTy->isIntegerTy(1)) { |
4595 | | // Since we're going to memory, convert bools to their memory |
4596 | | // representation. |
4597 | 350 | EltTy = i32Ty; |
4598 | 350 | if (Ty->isVectorTy()) |
4599 | 322 | Ty = VectorType::get(EltTy, Ty->getVectorNumElements()); |
4600 | 28 | else |
4601 | 28 | Ty = EltTy; |
4602 | 350 | val = Builder.CreateZExt(val, Ty); |
4603 | 350 | } |
4604 | | |
4605 | | // Min precision alloc size is 32-bit; widen to match store intrinsic. |
4606 | | // Scalar RawBufferStore widening is handled by TranslateMinPrecisionRawBuffer |
4607 | | // in DxilGenerationPass, which has signedness info from struct annotations. |
4608 | 16.7k | if (opcode == OP::OpCode::RawBufferVectorStore) { |
4609 | 2.33k | const DataLayout &DL = |
4610 | 2.33k | OP->GetModule()->GetHLModule().GetModule()->getDataLayout(); |
4611 | 2.33k | Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL); |
4612 | 2.33k | if (WideTy != Ty) { |
4613 | 24 | if (EltTy->isFloatingPointTy()) |
4614 | 8 | val = Builder.CreateFPExt(val, WideTy); |
4615 | 16 | else |
4616 | | // TODO(#8314): Signedness info is lost by this point; SExt is wrong |
4617 | | // for min16uint. Front-end should widen during Clang CodeGen instead. |
4618 | 16 | val = Builder.CreateSExt(val, WideTy); |
4619 | 24 | EltTy = WideTy->getScalarType(); |
4620 | 24 | Ty = WideTy; |
4621 | 24 | } |
4622 | 2.33k | } |
4623 | | |
4624 | | // If RawBuffer store of 64-bit value, don't set alignment to 8, |
4625 | | // since buffer alignment isn't known to be anything over 4. |
4626 | 16.7k | unsigned alignValue = OP->GetAllocSizeForType(EltTy); |
4627 | 16.7k | if (RK == HLResource::Kind::RawBuffer && alignValue > 43.06k ) |
4628 | 232 | alignValue = 4; |
4629 | 16.7k | Constant *Alignment = OP->GetI32Const(alignValue); |
4630 | 16.7k | bool is64 = EltTy == i64Ty || EltTy == doubleTy15.8k ; |
4631 | 16.7k | if (is64 && IsTyped1.82k ) { |
4632 | 218 | EltTy = i32Ty; |
4633 | 218 | } |
4634 | | |
4635 | 16.7k | llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); |
4636 | | |
4637 | 16.7k | llvm::Value *undefI = |
4638 | 16.7k | llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext())); |
4639 | | |
4640 | 16.7k | llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType()); |
4641 | | |
4642 | 16.7k | SmallVector<Value *, 13> storeArgs; |
4643 | 16.7k | storeArgs.emplace_back(opArg); // opcode |
4644 | 16.7k | storeArgs.emplace_back(handle); // resource handle |
4645 | | |
4646 | 16.7k | unsigned OffsetIdx = 0; |
4647 | 16.7k | if (opcode == OP::OpCode::RawBufferStore || |
4648 | 16.7k | opcode == OP::OpCode::RawBufferVectorStore5.66k || |
4649 | 16.7k | opcode == OP::OpCode::BufferStore3.33k ) { |
4650 | | // Append Coord0 (Index) value. |
4651 | 14.7k | if (Idx->getType()->isVectorTy()) { |
4652 | 0 | Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0); |
4653 | 0 | storeArgs.emplace_back(ScalarIdx); // Coord0 (Index). |
4654 | 14.7k | } else { |
4655 | 14.7k | storeArgs.emplace_back(Idx); // Coord0 (Index). |
4656 | 14.7k | } |
4657 | | |
4658 | | // Store OffsetIdx representing the argument that may need to be incremented |
4659 | | // later to load additional chunks of data. |
4660 | | // Only structured buffers can use the offset parameter. |
4661 | | // Others must increment the index. |
4662 | 14.7k | if (RK == DxilResource::Kind::StructuredBuffer) |
4663 | 10.4k | OffsetIdx = storeArgs.size(); |
4664 | 4.30k | else |
4665 | 4.30k | OffsetIdx = storeArgs.size() - 1; |
4666 | | |
4667 | | // Coord1 (Offset). |
4668 | 14.7k | storeArgs.emplace_back(offset); |
4669 | 14.7k | } else { |
4670 | | // texture store |
4671 | 2.09k | unsigned coordSize = DxilResource::GetNumCoords(RK); |
4672 | | |
4673 | | // Set x first. |
4674 | 2.09k | if (Idx->getType()->isVectorTy()) |
4675 | 1.62k | storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0)); |
4676 | 466 | else |
4677 | 466 | storeArgs.emplace_back(Idx); |
4678 | | |
4679 | 6.27k | for (unsigned i = 1; i < 3; i++4.18k ) { |
4680 | 4.18k | if (i < coordSize) |
4681 | 1.73k | storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i)); |
4682 | 2.44k | else |
4683 | 2.44k | storeArgs.emplace_back(undefI); |
4684 | 4.18k | } |
4685 | | // TODO: support mip for texture ST |
4686 | 2.09k | } |
4687 | | |
4688 | | // RawBufferVectorStore only takes a single value and alignment arguments. |
4689 | 16.7k | if (opcode == DXIL::OpCode::RawBufferVectorStore) { |
4690 | 2.33k | storeArgs.emplace_back(val); |
4691 | 2.33k | storeArgs.emplace_back(Alignment); |
4692 | 2.33k | Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty); |
4693 | 2.33k | Builder.CreateCall(F, storeArgs); |
4694 | 2.33k | return; |
4695 | 2.33k | } |
4696 | 14.4k | Function *F = OP->GetOpFunc(opcode, EltTy); |
4697 | | |
4698 | 14.4k | constexpr unsigned MaxStoreElemCount = 4; |
4699 | 14.4k | const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements()8.20k : 16.25k ; |
4700 | 14.4k | const unsigned StoreInstCount = |
4701 | 14.4k | (CompCount / MaxStoreElemCount) + (CompCount % MaxStoreElemCount != 0); |
4702 | 14.4k | SmallVector<decltype(storeArgs), 4> storeArgsList; |
4703 | | |
4704 | | // Max number of element to store should be 16 (for a 4x4 matrix) |
4705 | 14.4k | DXASSERT_NOMSG(StoreInstCount >= 1 && StoreInstCount <= 4); |
4706 | | |
4707 | | // If number of elements to store exceeds the maximum number of elements |
4708 | | // that can be stored in a single store call, make sure to generate enough |
4709 | | // store calls to store all elements |
4710 | 29.3k | for (unsigned j = 0; j < StoreInstCount; j++14.8k ) { |
4711 | 14.8k | decltype(storeArgs) newStoreArgs; |
4712 | 14.8k | for (Value *storeArg : storeArgs) |
4713 | 61.6k | newStoreArgs.emplace_back(storeArg); |
4714 | 14.8k | storeArgsList.emplace_back(newStoreArgs); |
4715 | 14.8k | } |
4716 | | |
4717 | 29.3k | for (unsigned j = 0; j < storeArgsList.size(); j++14.8k ) { |
4718 | | // For second and subsequent store calls, increment the resource-appropriate |
4719 | | // index or offset parameter. |
4720 | 14.8k | if (j > 0) { |
4721 | 436 | unsigned EltSize = OP->GetAllocSizeForType(EltTy); |
4722 | 436 | unsigned NewCoord = EltSize * MaxStoreElemCount * j; |
4723 | 436 | Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord); |
4724 | 436 | NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal); |
4725 | 436 | storeArgsList[j][OffsetIdx] = NewCoordVal; |
4726 | 436 | } |
4727 | | |
4728 | | // Set value parameters. |
4729 | 14.8k | uint8_t mask = 0; |
4730 | 14.8k | if (Ty->isVectorTy()) { |
4731 | 8.64k | unsigned vecSize = |
4732 | 8.64k | std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - |
4733 | 8.64k | (j * MaxStoreElemCount); |
4734 | 8.64k | Value *emptyVal = undefVal; |
4735 | 8.64k | if (IsTyped) { |
4736 | 1.73k | mask = DXIL::kCompMask_All; |
4737 | 1.73k | emptyVal = Builder.CreateExtractElement(val, (uint64_t)0); |
4738 | 1.73k | } |
4739 | | |
4740 | 43.2k | for (unsigned i = 0; i < MaxStoreElemCount; i++34.5k ) { |
4741 | 34.5k | if (i < vecSize) { |
4742 | 23.7k | storeArgsList[j].emplace_back( |
4743 | 23.7k | Builder.CreateExtractElement(val, (j * MaxStoreElemCount) + i)); |
4744 | 23.7k | mask |= (1 << i); |
4745 | 23.7k | } else { |
4746 | 10.8k | storeArgsList[j].emplace_back(emptyVal); |
4747 | 10.8k | } |
4748 | 34.5k | } |
4749 | | |
4750 | 8.64k | } else { |
4751 | 6.25k | if (IsTyped) { |
4752 | 1.59k | mask = DXIL::kCompMask_All; |
4753 | 1.59k | storeArgsList[j].emplace_back(val); |
4754 | 1.59k | storeArgsList[j].emplace_back(val); |
4755 | 1.59k | storeArgsList[j].emplace_back(val); |
4756 | 1.59k | storeArgsList[j].emplace_back(val); |
4757 | 4.65k | } else { |
4758 | 4.65k | storeArgsList[j].emplace_back(val); |
4759 | 4.65k | storeArgsList[j].emplace_back(undefVal); |
4760 | 4.65k | storeArgsList[j].emplace_back(undefVal); |
4761 | 4.65k | storeArgsList[j].emplace_back(undefVal); |
4762 | 4.65k | mask = DXIL::kCompMask_X; |
4763 | 4.65k | } |
4764 | 6.25k | } |
4765 | | |
4766 | 14.8k | if (is64 && IsTyped1.49k ) { |
4767 | 218 | unsigned size = 1; |
4768 | 218 | if (Ty->isVectorTy()) { |
4769 | 36 | size = |
4770 | 36 | std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - |
4771 | 36 | (j * MaxStoreElemCount); |
4772 | 36 | } |
4773 | 218 | DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords"); |
4774 | 218 | unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore || |
4775 | 218 | opcode == DXIL::OpCode::TextureStoreSample114 |
4776 | 218 | ? DXIL::OperandIndex::kTextureStoreVal0OpIdx112 |
4777 | 218 | : DXIL::OperandIndex::kBufferStoreVal0OpIdx106 ; |
4778 | 218 | Value *V0 = storeArgsList[j][val0OpIdx]; |
4779 | 218 | Value *V1 = storeArgsList[j][val0OpIdx + 1]; |
4780 | | |
4781 | 218 | Value *vals32[4]; |
4782 | 218 | EltTy = Ty->getScalarType(); |
4783 | 218 | Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder); |
4784 | | // Fill the uninit vals. |
4785 | 218 | if (size == 1) { |
4786 | 190 | vals32[2] = vals32[0]; |
4787 | 190 | vals32[3] = vals32[1]; |
4788 | 190 | } |
4789 | | // Change valOp to 32 version. |
4790 | 1.09k | for (unsigned i = 0; i < 4; i++872 ) { |
4791 | 872 | storeArgsList[j][val0OpIdx + i] = vals32[i]; |
4792 | 872 | } |
4793 | | // change mask for double |
4794 | 218 | if (opcode == DXIL::OpCode::RawBufferStore) { |
4795 | 0 | mask = size == 1 ? DXIL::kCompMask_X | DXIL::kCompMask_Y |
4796 | 0 | : DXIL::kCompMask_All; |
4797 | 0 | } |
4798 | 218 | } |
4799 | | |
4800 | 14.8k | storeArgsList[j].emplace_back(OP->GetU8Const(mask)); // mask |
4801 | 14.8k | if (opcode == DXIL::OpCode::RawBufferStore) |
4802 | 11.5k | storeArgsList[j].emplace_back(Alignment); // alignment only for raw buffer |
4803 | 3.33k | else if (opcode == DXIL::OpCode::TextureStoreSample) { |
4804 | 80 | storeArgsList[j].emplace_back( |
4805 | 80 | sampIdx ? sampIdx40 |
4806 | 80 | : Builder.getInt32(0)40 ); // sample idx only for MS textures |
4807 | 80 | } |
4808 | 14.8k | Builder.CreateCall(F, storeArgsList[j]); |
4809 | 14.8k | } |
4810 | 14.4k | } |
4811 | | |
4812 | | Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
4813 | | HLOperationLowerHelper &helper, |
4814 | | HLObjectOperationLowerHelper *pObjHelper, |
4815 | 3.06k | bool &Translated) { |
4816 | 3.06k | hlsl::OP *hlslOP = &helper.hlslOP; |
4817 | 3.06k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
4818 | | |
4819 | 3.06k | IRBuilder<> Builder(CI); |
4820 | 3.06k | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
4821 | | |
4822 | 3.06k | Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx); |
4823 | 3.06k | Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx); |
4824 | 3.06k | Value *UndefI = UndefValue::get(Builder.getInt32Ty()); |
4825 | 3.06k | TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP); |
4826 | | |
4827 | 3.06k | return nullptr; |
4828 | 3.06k | } |
4829 | | } // namespace |
4830 | | |
4831 | | // Atomic intrinsics. |
4832 | | namespace { |
4833 | | // Atomic intrinsics. |
4834 | | struct AtomicHelper { |
4835 | | AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType = nullptr); |
4836 | | AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx, |
4837 | | Value *baseOffset, Type *opType = nullptr); |
4838 | | OP::OpCode opcode; |
4839 | | Value *handle; |
4840 | | Value *addr; |
4841 | | Value *offset; // Offset for structrued buffer. |
4842 | | Value *value; |
4843 | | Value *originalValue; |
4844 | | Value *compareValue; |
4845 | | Type *operationType; |
4846 | | }; |
4847 | | |
4848 | | // For MOP version of Interlocked*. |
4849 | | AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType) |
4850 | 2.48k | : opcode(op), handle(h), offset(nullptr), originalValue(nullptr), |
4851 | 2.48k | operationType(opType) { |
4852 | 2.48k | addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex); |
4853 | 2.48k | if (op == OP::OpCode::AtomicCompareExchange) { |
4854 | 962 | compareValue = CI->getArgOperand( |
4855 | 962 | HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex); |
4856 | 962 | value = |
4857 | 962 | CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex); |
4858 | 962 | if (CI->getNumArgOperands() == |
4859 | 962 | (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1)) |
4860 | 526 | originalValue = CI->getArgOperand( |
4861 | 526 | HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex); |
4862 | 1.52k | } else { |
4863 | 1.52k | value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex); |
4864 | 1.52k | if (CI->getNumArgOperands() == |
4865 | 1.52k | (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1)) |
4866 | 1.34k | originalValue = CI->getArgOperand( |
4867 | 1.34k | HLOperandIndex::kObjectInterlockedOriginalValueOpIndex); |
4868 | 1.52k | } |
4869 | 2.48k | if (nullptr == operationType) |
4870 | 2.32k | operationType = value->getType(); |
4871 | 2.48k | } |
4872 | | // For IOP version of Interlocked*. |
4873 | | AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx, |
4874 | | Value *baseOffset, Type *opType) |
4875 | 4.18k | : opcode(op), handle(h), addr(bufIdx), offset(baseOffset), |
4876 | 4.18k | originalValue(nullptr), operationType(opType) { |
4877 | 4.18k | if (op == OP::OpCode::AtomicCompareExchange) { |
4878 | 1.42k | compareValue = |
4879 | 1.42k | CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex); |
4880 | 1.42k | value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex); |
4881 | 1.42k | if (CI->getNumArgOperands() == |
4882 | 1.42k | (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1)) |
4883 | 692 | originalValue = CI->getArgOperand( |
4884 | 692 | HLOperandIndex::kInterlockedCmpOriginalValueOpIndex); |
4885 | 2.75k | } else { |
4886 | 2.75k | value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex); |
4887 | 2.75k | if (CI->getNumArgOperands() == |
4888 | 2.75k | (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1)) |
4889 | 720 | originalValue = |
4890 | 720 | CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex); |
4891 | 2.75k | } |
4892 | 4.18k | if (nullptr == operationType) |
4893 | 4.10k | operationType = value->getType(); |
4894 | 4.18k | } |
4895 | | |
4896 | | void TranslateAtomicBinaryOperation(AtomicHelper &helper, |
4897 | | DXIL::AtomicBinOpCode atomicOp, |
4898 | 4.28k | IRBuilder<> &Builder, hlsl::OP *hlslOP) { |
4899 | 4.28k | Value *handle = helper.handle; |
4900 | 4.28k | Value *addr = helper.addr; |
4901 | 4.28k | Value *val = helper.value; |
4902 | 4.28k | Type *Ty = helper.operationType; |
4903 | 4.28k | Type *valTy = val->getType(); |
4904 | | |
4905 | 4.28k | Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext())); |
4906 | | |
4907 | 4.28k | Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType()); |
4908 | 4.28k | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode)); |
4909 | 4.28k | Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp)); |
4910 | | |
4911 | 4.28k | if (Ty != valTy) |
4912 | 72 | val = Builder.CreateBitCast(val, Ty); |
4913 | | |
4914 | 4.28k | Value *args[] = {opArg, handle, atomicOpArg, |
4915 | 4.28k | undefI, undefI, undefI, // coordinates |
4916 | 4.28k | val}; |
4917 | | |
4918 | | // Setup coordinates. |
4919 | 4.28k | if (addr->getType()->isVectorTy()) { |
4920 | 250 | unsigned vectorNumElements = addr->getType()->getVectorNumElements(); |
4921 | 250 | DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op"); |
4922 | 250 | assert(vectorNumElements <= 3); |
4923 | 846 | for (unsigned i = 0; i < vectorNumElements; i++596 ) { |
4924 | 596 | Value *Elt = Builder.CreateExtractElement(addr, i); |
4925 | 596 | args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt; |
4926 | 596 | } |
4927 | 250 | } else |
4928 | 4.03k | args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr; |
4929 | | |
4930 | | // Set offset for structured buffer. |
4931 | 4.28k | if (helper.offset) |
4932 | 1.00k | args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset; |
4933 | | |
4934 | 4.28k | Value *origVal = |
4935 | 4.28k | Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp)); |
4936 | 4.28k | if (helper.originalValue) { |
4937 | 2.06k | if (Ty != valTy) |
4938 | 72 | origVal = Builder.CreateBitCast(origVal, valTy); |
4939 | 2.06k | Builder.CreateStore(origVal, helper.originalValue); |
4940 | 2.06k | } |
4941 | 4.28k | } |
4942 | | |
4943 | | Value *TranslateMopAtomicBinaryOperation( |
4944 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
4945 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
4946 | 1.52k | bool &Translated) { |
4947 | 1.52k | hlsl::OP *hlslOP = &helper.hlslOP; |
4948 | | |
4949 | 1.52k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
4950 | 1.52k | IRBuilder<> Builder(CI); |
4951 | | |
4952 | 1.52k | switch (IOP) { |
4953 | 244 | case IntrinsicOp::MOP_InterlockedAdd: |
4954 | 316 | case IntrinsicOp::MOP_InterlockedAdd64: { |
4955 | 316 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4956 | 316 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder, |
4957 | 316 | hlslOP); |
4958 | 316 | } break; |
4959 | 72 | case IntrinsicOp::MOP_InterlockedAnd: |
4960 | 144 | case IntrinsicOp::MOP_InterlockedAnd64: { |
4961 | 144 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4962 | 144 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder, |
4963 | 144 | hlslOP); |
4964 | 144 | } break; |
4965 | 216 | case IntrinsicOp::MOP_InterlockedExchange: |
4966 | 424 | case IntrinsicOp::MOP_InterlockedExchange64: { |
4967 | 424 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4968 | 424 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange, |
4969 | 424 | Builder, hlslOP); |
4970 | 424 | } break; |
4971 | 48 | case IntrinsicOp::MOP_InterlockedExchangeFloat: { |
4972 | 48 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle, |
4973 | 48 | Type::getInt32Ty(CI->getContext())); |
4974 | 48 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange, |
4975 | 48 | Builder, hlslOP); |
4976 | 48 | } break; |
4977 | 58 | case IntrinsicOp::MOP_InterlockedMax: |
4978 | 118 | case IntrinsicOp::MOP_InterlockedMax64: { |
4979 | 118 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4980 | 118 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder, |
4981 | 118 | hlslOP); |
4982 | 118 | } break; |
4983 | 58 | case IntrinsicOp::MOP_InterlockedMin: |
4984 | 118 | case IntrinsicOp::MOP_InterlockedMin64: { |
4985 | 118 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4986 | 118 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder, |
4987 | 118 | hlslOP); |
4988 | 118 | } break; |
4989 | 34 | case IntrinsicOp::MOP_InterlockedUMax: { |
4990 | 34 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4991 | 34 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder, |
4992 | 34 | hlslOP); |
4993 | 34 | } break; |
4994 | 34 | case IntrinsicOp::MOP_InterlockedUMin: { |
4995 | 34 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4996 | 34 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder, |
4997 | 34 | hlslOP); |
4998 | 34 | } break; |
4999 | 72 | case IntrinsicOp::MOP_InterlockedOr: |
5000 | 144 | case IntrinsicOp::MOP_InterlockedOr64: { |
5001 | 144 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
5002 | 144 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder, |
5003 | 144 | hlslOP); |
5004 | 144 | } break; |
5005 | 72 | case IntrinsicOp::MOP_InterlockedXor: |
5006 | 144 | case IntrinsicOp::MOP_InterlockedXor64: |
5007 | 144 | default: { |
5008 | 144 | DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor || |
5009 | 144 | IOP == IntrinsicOp::MOP_InterlockedXor64, |
5010 | 144 | "invalid MOP atomic intrinsic"); |
5011 | 144 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
5012 | 144 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder, |
5013 | 144 | hlslOP); |
5014 | 144 | } break; |
5015 | 1.52k | } |
5016 | | |
5017 | 1.52k | return nullptr; |
5018 | 1.52k | } |
5019 | | void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder, |
5020 | 2.38k | hlsl::OP *hlslOP) { |
5021 | 2.38k | Value *handle = helper.handle; |
5022 | 2.38k | Value *addr = helper.addr; |
5023 | 2.38k | Value *val = helper.value; |
5024 | 2.38k | Value *cmpVal = helper.compareValue; |
5025 | | |
5026 | 2.38k | Type *Ty = helper.operationType; |
5027 | 2.38k | Type *valTy = val->getType(); |
5028 | | |
5029 | 2.38k | Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext())); |
5030 | | |
5031 | 2.38k | Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType()); |
5032 | 2.38k | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode)); |
5033 | | |
5034 | 2.38k | if (Ty != valTy) { |
5035 | 168 | val = Builder.CreateBitCast(val, Ty); |
5036 | 168 | if (cmpVal) |
5037 | 168 | cmpVal = Builder.CreateBitCast(cmpVal, Ty); |
5038 | 168 | } |
5039 | | |
5040 | 2.38k | Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates |
5041 | 2.38k | cmpVal, val}; |
5042 | | |
5043 | | // Setup coordinates. |
5044 | 2.38k | if (addr->getType()->isVectorTy()) { |
5045 | 60 | unsigned vectorNumElements = addr->getType()->getVectorNumElements(); |
5046 | 60 | DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op"); |
5047 | 60 | assert(vectorNumElements <= 3); |
5048 | 196 | for (unsigned i = 0; i < vectorNumElements; i++136 ) { |
5049 | 136 | Value *Elt = Builder.CreateExtractElement(addr, i); |
5050 | 136 | args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt; |
5051 | 136 | } |
5052 | 60 | } else |
5053 | 2.32k | args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr; |
5054 | | |
5055 | | // Set offset for structured buffer. |
5056 | 2.38k | if (helper.offset) |
5057 | 536 | args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset; |
5058 | | |
5059 | 2.38k | Value *origVal = Builder.CreateCall(dxilAtomic, args); |
5060 | 2.38k | if (helper.originalValue) { |
5061 | 1.21k | if (Ty != valTy) |
5062 | 84 | origVal = Builder.CreateBitCast(origVal, valTy); |
5063 | 1.21k | Builder.CreateStore(origVal, helper.originalValue); |
5064 | 1.21k | } |
5065 | 2.38k | } |
5066 | | |
5067 | | Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP, |
5068 | | OP::OpCode opcode, |
5069 | | HLOperationLowerHelper &helper, |
5070 | | HLObjectOperationLowerHelper *pObjHelper, |
5071 | 962 | bool &Translated) { |
5072 | 962 | hlsl::OP *hlslOP = &helper.hlslOP; |
5073 | | |
5074 | 962 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5075 | 962 | IRBuilder<> Builder(CI); |
5076 | 962 | Type *opType = nullptr; |
5077 | 962 | if (IOP == IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise || |
5078 | 962 | IOP == IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise906 ) |
5079 | 112 | opType = Type::getInt32Ty(CI->getContext()); |
5080 | 962 | AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle, |
5081 | 962 | opType); |
5082 | 962 | TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP); |
5083 | 962 | return nullptr; |
5084 | 962 | } |
5085 | | |
5086 | | void TranslateSharedMemOrNodeAtomicBinOp(CallInst *CI, IntrinsicOp IOP, |
5087 | 1.49k | Value *addr) { |
5088 | 1.49k | AtomicRMWInst::BinOp Op; |
5089 | 1.49k | IRBuilder<> Builder(CI); |
5090 | 1.49k | Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex); |
5091 | 1.49k | PointerType *ptrType = dyn_cast<PointerType>( |
5092 | 1.49k | CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType()); |
5093 | 1.49k | bool needCast = ptrType && ptrType->getElementType()->isFloatTy(); |
5094 | 1.49k | switch (IOP) { |
5095 | 376 | case IntrinsicOp::IOP_InterlockedAdd: |
5096 | 376 | Op = AtomicRMWInst::BinOp::Add; |
5097 | 376 | break; |
5098 | 104 | case IntrinsicOp::IOP_InterlockedAnd: |
5099 | 104 | Op = AtomicRMWInst::BinOp::And; |
5100 | 104 | break; |
5101 | 472 | case IntrinsicOp::IOP_InterlockedExchange: |
5102 | 472 | if (needCast) { |
5103 | 48 | val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext())); |
5104 | 48 | addr = Builder.CreateBitCast( |
5105 | 48 | addr, Type::getInt32PtrTy(CI->getContext(), |
5106 | 48 | addr->getType()->getPointerAddressSpace())); |
5107 | 48 | } |
5108 | 472 | Op = AtomicRMWInst::BinOp::Xchg; |
5109 | 472 | break; |
5110 | 68 | case IntrinsicOp::IOP_InterlockedMax: |
5111 | 68 | Op = AtomicRMWInst::BinOp::Max; |
5112 | 68 | break; |
5113 | 84 | case IntrinsicOp::IOP_InterlockedUMax: |
5114 | 84 | Op = AtomicRMWInst::BinOp::UMax; |
5115 | 84 | break; |
5116 | 60 | case IntrinsicOp::IOP_InterlockedMin: |
5117 | 60 | Op = AtomicRMWInst::BinOp::Min; |
5118 | 60 | break; |
5119 | 68 | case IntrinsicOp::IOP_InterlockedUMin: |
5120 | 68 | Op = AtomicRMWInst::BinOp::UMin; |
5121 | 68 | break; |
5122 | 156 | case IntrinsicOp::IOP_InterlockedOr: |
5123 | 156 | Op = AtomicRMWInst::BinOp::Or; |
5124 | 156 | break; |
5125 | 104 | case IntrinsicOp::IOP_InterlockedXor: |
5126 | 104 | default: |
5127 | 104 | DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic"); |
5128 | 104 | Op = AtomicRMWInst::BinOp::Xor; |
5129 | 104 | break; |
5130 | 1.49k | } |
5131 | | |
5132 | 1.49k | Value *Result = Builder.CreateAtomicRMW( |
5133 | 1.49k | Op, addr, val, AtomicOrdering::SequentiallyConsistent); |
5134 | 1.49k | if (CI->getNumArgOperands() > |
5135 | 1.49k | HLOperandIndex::kInterlockedOriginalValueOpIndex) { |
5136 | 574 | if (needCast) |
5137 | 48 | Result = |
5138 | 48 | Builder.CreateBitCast(Result, Type::getFloatTy(CI->getContext())); |
5139 | 574 | Builder.CreateStore( |
5140 | 574 | Result, |
5141 | 574 | CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex)); |
5142 | 574 | } |
5143 | 1.49k | } |
5144 | | |
5145 | 3.65k | static Value *SkipAddrSpaceCast(Value *Ptr) { |
5146 | 3.65k | if (AddrSpaceCastInst *CastInst = dyn_cast<AddrSpaceCastInst>(Ptr)) |
5147 | 2.25k | return CastInst->getOperand(0); |
5148 | 1.40k | if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Ptr)) { |
5149 | 400 | if (ConstExpr->getOpcode() == Instruction::AddrSpaceCast) { |
5150 | 400 | return ConstExpr->getOperand(0); |
5151 | 400 | } |
5152 | 400 | } |
5153 | 1.00k | return Ptr; |
5154 | 1.40k | } |
5155 | | |
5156 | | Value * |
5157 | | TranslateNodeIncrementOutputCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
5158 | | HLOperationLowerHelper &helper, |
5159 | | HLObjectOperationLowerHelper *pObjHelper, |
5160 | 84 | bool isPerThread, bool &Translated) { |
5161 | | |
5162 | 84 | hlsl::OP *OP = &helper.hlslOP; |
5163 | 84 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5164 | 84 | Value *count = |
5165 | 84 | CI->getArgOperand(HLOperandIndex::kIncrementOutputCountCountIdx); |
5166 | 84 | Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); |
5167 | 84 | Value *opArg = OP->GetU32Const((unsigned)op); |
5168 | 84 | Value *perThread = OP->GetI1Const(isPerThread); |
5169 | | |
5170 | 84 | Value *args[] = {opArg, handle, count, perThread}; |
5171 | | |
5172 | 84 | IRBuilder<> Builder(CI); |
5173 | 84 | Builder.CreateCall(dxilFunc, args); |
5174 | 84 | return nullptr; |
5175 | 84 | } |
5176 | | |
5177 | | /* |
5178 | | HLSL: |
5179 | | void EmptyNodeOutput::GroupIncrementOutputCount(uint count) |
5180 | | DXIL: |
5181 | | void @dx.op.groupIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle |
5182 | | %NodeOutput, i32 count) |
5183 | | */ |
5184 | | Value *TranslateNodeGroupIncrementOutputCount( |
5185 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
5186 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5187 | 76 | bool &Translated) { |
5188 | 76 | return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper, |
5189 | 76 | /*isPerThread*/ false, Translated); |
5190 | 76 | } |
5191 | | |
5192 | | /* |
5193 | | HLSL: |
5194 | | void EmptyNodeOutput::ThreadIncrementOutputCount(uint count) |
5195 | | DXIL: |
5196 | | void @dx.op.threadIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle |
5197 | | %NodeOutput, i32 count) |
5198 | | */ |
5199 | | Value *TranslateNodeThreadIncrementOutputCount( |
5200 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
5201 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5202 | 8 | bool &Translated) { |
5203 | 8 | return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper, |
5204 | 8 | /*isPerThread*/ true, Translated); |
5205 | 8 | } |
5206 | | |
5207 | | // For known non-groupshared, verify that the destination param is valid |
5208 | | void ValidateAtomicDestination(CallInst *CI, |
5209 | 1.00k | HLObjectOperationLowerHelper *pObjHelper) { |
5210 | 1.00k | Value *dest = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex); |
5211 | | // If we encounter a gep, we may provide a more specific error message |
5212 | 1.00k | bool hasGep = isa<GetElementPtrInst>(dest); |
5213 | | |
5214 | | // Confirm that dest is a properly-used UAV |
5215 | | |
5216 | | // Drill through subscripts and geps, anything else indicates a misuse |
5217 | 2.23k | while (true) { |
5218 | 2.23k | if (GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(dest)) { |
5219 | 284 | dest = gep->getPointerOperand(); |
5220 | 284 | continue; |
5221 | 284 | } |
5222 | 1.95k | if (CallInst *handle = dyn_cast<CallInst>(dest)) { |
5223 | 1.86k | hlsl::HLOpcodeGroup group = |
5224 | 1.86k | hlsl::GetHLOpcodeGroup(handle->getCalledFunction()); |
5225 | 1.86k | if (group != HLOpcodeGroup::HLSubscript) |
5226 | 914 | break; |
5227 | 946 | dest = handle->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
5228 | 946 | continue; |
5229 | 1.86k | } |
5230 | 90 | break; |
5231 | 1.95k | } |
5232 | | |
5233 | 1.00k | if (pObjHelper->GetRC(dest) == DXIL::ResourceClass::UAV) { |
5234 | 914 | DXIL::ResourceKind RK = pObjHelper->GetRK(dest); |
5235 | 914 | if (DXIL::IsStructuredBuffer(RK)) |
5236 | 404 | return; // no errors |
5237 | 510 | if (DXIL::IsTyped(RK)) { |
5238 | 510 | if (hasGep) |
5239 | 16 | dxilutil::EmitErrorOnInstruction( |
5240 | 16 | CI, "Typed resources used in atomic operations must have a scalar " |
5241 | 16 | "element type."); |
5242 | 510 | return; // error emitted or else no errors |
5243 | 510 | } |
5244 | 510 | } |
5245 | | |
5246 | 90 | dxilutil::EmitErrorOnInstruction( |
5247 | 90 | CI, "Atomic operation targets must be groupshared, Node Record or UAV."); |
5248 | 90 | } |
5249 | | |
5250 | | Value *TranslateIopAtomicBinaryOperation( |
5251 | | CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
5252 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5253 | 2.42k | bool &Translated) { |
5254 | 2.42k | Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex); |
5255 | 2.42k | addr = SkipAddrSpaceCast(addr); |
5256 | | |
5257 | 2.42k | unsigned addressSpace = addr->getType()->getPointerAddressSpace(); |
5258 | 2.42k | if (addressSpace == DXIL::kTGSMAddrSpace || |
5259 | 2.42k | addressSpace == DXIL::kNodeRecordAddrSpace974 ) |
5260 | 1.49k | TranslateSharedMemOrNodeAtomicBinOp(CI, IOP, addr); |
5261 | 928 | else { |
5262 | | // If not groupshared or node record, we either have an error case or will |
5263 | | // translate the atomic op in the process of translating users of the |
5264 | | // subscript operator Mark not translated and validate dest param |
5265 | 928 | Translated = false; |
5266 | 928 | ValidateAtomicDestination(CI, pObjHelper); |
5267 | 928 | } |
5268 | | |
5269 | 2.42k | return nullptr; |
5270 | 2.42k | } |
5271 | | |
5272 | 1.16k | void TranslateSharedMemOrNodeAtomicCmpXChg(CallInst *CI, Value *addr) { |
5273 | 1.16k | Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex); |
5274 | 1.16k | Value *cmpVal = |
5275 | 1.16k | CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex); |
5276 | 1.16k | IRBuilder<> Builder(CI); |
5277 | | |
5278 | 1.16k | PointerType *ptrType = dyn_cast<PointerType>( |
5279 | 1.16k | CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType()); |
5280 | 1.16k | bool needCast = false; |
5281 | 1.16k | if (ptrType && ptrType->getElementType()->isFloatTy()) { |
5282 | 166 | needCast = true; |
5283 | 166 | val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext())); |
5284 | 166 | cmpVal = Builder.CreateBitCast(cmpVal, Type::getInt32Ty(CI->getContext())); |
5285 | 166 | unsigned addrSpace = cast<PointerType>(addr->getType())->getAddressSpace(); |
5286 | 166 | addr = Builder.CreateBitCast( |
5287 | 166 | addr, Type::getInt32PtrTy(CI->getContext(), addrSpace)); |
5288 | 166 | } |
5289 | | |
5290 | 1.16k | Value *Result = Builder.CreateAtomicCmpXchg( |
5291 | 1.16k | addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent, |
5292 | 1.16k | AtomicOrdering::SequentiallyConsistent); |
5293 | | |
5294 | 1.16k | if (CI->getNumArgOperands() > |
5295 | 1.16k | HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) { |
5296 | 538 | Value *originVal = Builder.CreateExtractValue(Result, 0); |
5297 | 538 | if (needCast) |
5298 | 56 | originVal = |
5299 | 56 | Builder.CreateBitCast(originVal, Type::getFloatTy(CI->getContext())); |
5300 | 538 | Builder.CreateStore( |
5301 | 538 | originVal, |
5302 | 538 | CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex)); |
5303 | 538 | } |
5304 | 1.16k | } |
5305 | | |
5306 | | Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP, |
5307 | | DXIL::OpCode opcode, |
5308 | | HLOperationLowerHelper &helper, |
5309 | | HLObjectOperationLowerHelper *pObjHelper, |
5310 | 1.23k | bool &Translated) { |
5311 | 1.23k | Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex); |
5312 | 1.23k | addr = SkipAddrSpaceCast(addr); |
5313 | | |
5314 | 1.23k | unsigned addressSpace = addr->getType()->getPointerAddressSpace(); |
5315 | 1.23k | if (addressSpace == DXIL::kTGSMAddrSpace || |
5316 | 1.23k | addressSpace == DXIL::kNodeRecordAddrSpace176 ) |
5317 | 1.16k | TranslateSharedMemOrNodeAtomicCmpXChg(CI, addr); |
5318 | 76 | else { |
5319 | | // If not groupshared, we either have an error case or will translate |
5320 | | // the atomic op in the process of translating users of the subscript |
5321 | | // operator Mark not translated and validate dest param |
5322 | 76 | Translated = false; |
5323 | 76 | ValidateAtomicDestination(CI, pObjHelper); |
5324 | 76 | } |
5325 | | |
5326 | 1.23k | return nullptr; |
5327 | 1.23k | } |
5328 | | } // namespace |
5329 | | |
5330 | | // Process Tess Factor. |
5331 | | namespace { |
5332 | | |
5333 | | // Clamp to [0.0f..1.0f], NaN->0.0f. |
5334 | | Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, |
5335 | 288 | IRBuilder<> &Builder) { |
5336 | 288 | float fMin = 0; |
5337 | 288 | float fMax = 1; |
5338 | 288 | Type *f32Ty = input->getType()->getScalarType(); |
5339 | 288 | Value *minFactor = ConstantFP::get(f32Ty, fMin); |
5340 | 288 | Value *maxFactor = ConstantFP::get(f32Ty, fMax); |
5341 | 288 | Type *Ty = input->getType(); |
5342 | 288 | if (Ty->isVectorTy()) |
5343 | 288 | minFactor = SplatToVector(minFactor, input->getType(), Builder); |
5344 | 288 | Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, |
5345 | 288 | hlslOP, Builder); |
5346 | 288 | if (Ty->isVectorTy()) |
5347 | 288 | maxFactor = SplatToVector(maxFactor, input->getType(), Builder); |
5348 | 288 | return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, |
5349 | 288 | Builder); |
5350 | 288 | } |
5351 | | |
5352 | | // Clamp to [1.0f..Inf], NaN->1.0f. |
5353 | 288 | Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5354 | 288 | float fMin = 1.0; |
5355 | 288 | Type *f32Ty = input->getType()->getScalarType(); |
5356 | 288 | Value *minFactor = ConstantFP::get(f32Ty, fMin); |
5357 | 288 | minFactor = SplatToVector(minFactor, input->getType(), Builder); |
5358 | 288 | return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, |
5359 | 288 | hlslOP, Builder); |
5360 | 288 | } |
5361 | | |
5362 | | // Do partitioning-specific clamping. |
5363 | | Value *ClampTessFactor(Value *input, |
5364 | | DXIL::TessellatorPartitioning partitionMode, |
5365 | 680 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5366 | 680 | const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64; |
5367 | 680 | const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63; |
5368 | | |
5369 | 680 | const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2; |
5370 | 680 | const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1; |
5371 | | |
5372 | 680 | const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64; |
5373 | | |
5374 | 680 | float fMin; |
5375 | 680 | float fMax; |
5376 | 680 | switch (partitionMode) { |
5377 | 152 | case DXIL::TessellatorPartitioning::Integer: |
5378 | 152 | fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; |
5379 | 152 | fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR; |
5380 | 152 | break; |
5381 | 152 | case DXIL::TessellatorPartitioning::Pow2: |
5382 | 152 | fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; |
5383 | 152 | fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; |
5384 | 152 | break; |
5385 | 224 | case DXIL::TessellatorPartitioning::FractionalOdd: |
5386 | 224 | fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; |
5387 | 224 | fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR; |
5388 | 224 | break; |
5389 | 152 | case DXIL::TessellatorPartitioning::FractionalEven: |
5390 | 152 | default: |
5391 | 152 | DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven, |
5392 | 152 | "invalid partition mode"); |
5393 | 152 | fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR; |
5394 | 152 | fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; |
5395 | 152 | break; |
5396 | 680 | } |
5397 | 680 | Type *f32Ty = input->getType()->getScalarType(); |
5398 | 680 | Value *minFactor = ConstantFP::get(f32Ty, fMin); |
5399 | 680 | Value *maxFactor = ConstantFP::get(f32Ty, fMax); |
5400 | 680 | Type *Ty = input->getType(); |
5401 | 680 | if (Ty->isVectorTy()) |
5402 | 632 | minFactor = SplatToVector(minFactor, input->getType(), Builder); |
5403 | 680 | Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, |
5404 | 680 | hlslOP, Builder); |
5405 | 680 | if (Ty->isVectorTy()) |
5406 | 632 | maxFactor = SplatToVector(maxFactor, input->getType(), Builder); |
5407 | 680 | return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, |
5408 | 680 | Builder); |
5409 | 680 | } |
5410 | | |
5411 | | // round up for integer/pow2 partitioning |
5412 | | // note that this code assumes the inputs should be in the range [1, inf), |
5413 | | // which should be enforced by the clamp above. |
5414 | | Value *RoundUpTessFactor(Value *input, |
5415 | | DXIL::TessellatorPartitioning partitionMode, |
5416 | 704 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5417 | 704 | switch (partitionMode) { |
5418 | 152 | case DXIL::TessellatorPartitioning::Integer: |
5419 | 152 | return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, |
5420 | 152 | Builder); |
5421 | 152 | case DXIL::TessellatorPartitioning::Pow2: { |
5422 | 152 | const unsigned kExponentMask = 0x7f800000; |
5423 | 152 | const unsigned kExponentLSB = 0x00800000; |
5424 | 152 | const unsigned kMantissaMask = 0x007fffff; |
5425 | 152 | Type *Ty = input->getType(); |
5426 | | // (val = (asuint(val) & mantissamask) ? |
5427 | | // (asuint(val) & exponentmask) + exponentbump : |
5428 | | // asuint(val) & exponentmask; |
5429 | 152 | Type *uintTy = Type::getInt32Ty(Ty->getContext()); |
5430 | 152 | if (Ty->isVectorTy()) |
5431 | 152 | uintTy = VectorType::get(uintTy, Ty->getVectorNumElements()); |
5432 | 152 | Value *uintVal = |
5433 | 152 | Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy); |
5434 | | |
5435 | 152 | Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask); |
5436 | 152 | mantMask = SplatToVector(mantMask, uintTy, Builder); |
5437 | 152 | Value *manVal = Builder.CreateAnd(uintVal, mantMask); |
5438 | | |
5439 | 152 | Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask); |
5440 | 152 | expMask = SplatToVector(expMask, uintTy, Builder); |
5441 | 152 | Value *expVal = Builder.CreateAnd(uintVal, expMask); |
5442 | | |
5443 | 152 | Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB); |
5444 | 152 | expLSB = SplatToVector(expLSB, uintTy, Builder); |
5445 | 152 | Value *newExpVal = Builder.CreateAdd(expVal, expLSB); |
5446 | | |
5447 | 152 | Value *manValNotZero = |
5448 | 152 | Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy)); |
5449 | 152 | Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal); |
5450 | 152 | return Builder.CreateUIToFP(factors, Ty); |
5451 | 0 | } break; |
5452 | 152 | case DXIL::TessellatorPartitioning::FractionalEven: |
5453 | 400 | case DXIL::TessellatorPartitioning::FractionalOdd: |
5454 | 400 | return input; |
5455 | 0 | default: |
5456 | 0 | DXASSERT(0, "invalid partition mode"); |
5457 | 0 | return nullptr; |
5458 | 704 | } |
5459 | 704 | } |
5460 | | |
5461 | | Value *TranslateProcessIsolineTessFactors( |
5462 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5463 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5464 | 32 | bool &Translated) { |
5465 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
5466 | | // Get partition mode |
5467 | 32 | DXASSERT_NOMSG(helper.functionProps); |
5468 | 32 | DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, |
5469 | 32 | "must be hull shader"); |
5470 | 32 | DXIL::TessellatorPartitioning partition = |
5471 | 32 | helper.functionProps->ShaderProps.HS.partition; |
5472 | | |
5473 | 32 | IRBuilder<> Builder(CI); |
5474 | | |
5475 | 32 | Value *rawDetailFactor = |
5476 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor); |
5477 | 32 | rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0); |
5478 | | |
5479 | 32 | Value *rawDensityFactor = |
5480 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor); |
5481 | 32 | rawDensityFactor = |
5482 | 32 | Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0); |
5483 | | |
5484 | 32 | Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2)); |
5485 | 32 | init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0); |
5486 | 32 | init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1); |
5487 | | |
5488 | 32 | Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder); |
5489 | 32 | Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder); |
5490 | | |
5491 | 32 | Value *roundedDetailFactor = |
5492 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor); |
5493 | 32 | Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1)); |
5494 | 32 | Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0); |
5495 | 32 | temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0); |
5496 | 32 | Builder.CreateStore(temp, roundedDetailFactor); |
5497 | | |
5498 | 32 | Value *roundedDensityFactor = |
5499 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor); |
5500 | 32 | Value *roundedY = Builder.CreateExtractElement(rounded, 1); |
5501 | 32 | temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0); |
5502 | 32 | Builder.CreateStore(temp, roundedDensityFactor); |
5503 | 32 | return nullptr; |
5504 | 32 | } |
5505 | | |
5506 | | // 3 inputs, 1 result |
5507 | | Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP, |
5508 | 120 | IRBuilder<> &Builder) { |
5509 | 120 | Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0); |
5510 | 120 | Value *input1 = Builder.CreateExtractElement(input, 1); |
5511 | 120 | Value *input2 = Builder.CreateExtractElement(input, 2); |
5512 | | |
5513 | 120 | if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin80 ) { |
5514 | 72 | Value *temp = |
5515 | 72 | TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder); |
5516 | 72 | Value *combined = |
5517 | 72 | TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder); |
5518 | 72 | return combined; |
5519 | 72 | } |
5520 | | |
5521 | | // Avg. |
5522 | 48 | Value *temp = Builder.CreateFAdd(input0, input1); |
5523 | 48 | Value *combined = Builder.CreateFAdd(temp, input2); |
5524 | 48 | Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0); |
5525 | 48 | combined = Builder.CreateFMul(combined, rcp); |
5526 | 48 | return combined; |
5527 | 120 | } |
5528 | | |
5529 | | // 4 inputs, 1 result |
5530 | | Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode, |
5531 | 120 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5532 | 120 | Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0); |
5533 | 120 | Value *input1 = Builder.CreateExtractElement(input, 1); |
5534 | 120 | Value *input2 = Builder.CreateExtractElement(input, 2); |
5535 | 120 | Value *input3 = Builder.CreateExtractElement(input, 3); |
5536 | | |
5537 | 120 | if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin80 ) { |
5538 | 72 | Value *temp0 = |
5539 | 72 | TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder); |
5540 | 72 | Value *temp1 = |
5541 | 72 | TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder); |
5542 | 72 | Value *combined = |
5543 | 72 | TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder); |
5544 | 72 | return combined; |
5545 | 72 | } |
5546 | | |
5547 | | // Avg. |
5548 | 48 | Value *temp0 = Builder.CreateFAdd(input0, input1); |
5549 | 48 | Value *temp1 = Builder.CreateFAdd(input2, input3); |
5550 | 48 | Value *combined = Builder.CreateFAdd(temp0, temp1); |
5551 | 48 | Value *rcp = ConstantFP::get(input0->getType(), 0.25); |
5552 | 48 | combined = Builder.CreateFMul(combined, rcp); |
5553 | 48 | return combined; |
5554 | 120 | } |
5555 | | |
5556 | | // 4 inputs, 2 result |
5557 | | Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode, |
5558 | 120 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5559 | 120 | Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0); |
5560 | 120 | Value *input1 = Builder.CreateExtractElement(input, 1); |
5561 | 120 | Value *input2 = Builder.CreateExtractElement(input, 2); |
5562 | 120 | Value *input3 = Builder.CreateExtractElement(input, 3); |
5563 | | |
5564 | 120 | if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin80 ) { |
5565 | 72 | Value *temp0 = |
5566 | 72 | TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder); |
5567 | 72 | Value *temp1 = |
5568 | 72 | TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder); |
5569 | 72 | Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2)); |
5570 | 72 | combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0); |
5571 | 72 | combined = Builder.CreateInsertElement(combined, temp1, 1); |
5572 | 72 | return combined; |
5573 | 72 | } |
5574 | | |
5575 | | // Avg. |
5576 | 48 | Value *temp0 = Builder.CreateFAdd(input0, input1); |
5577 | 48 | Value *temp1 = Builder.CreateFAdd(input2, input3); |
5578 | 48 | Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2)); |
5579 | 48 | combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0); |
5580 | 48 | combined = Builder.CreateInsertElement(combined, temp1, 1); |
5581 | 48 | Constant *rcp = ConstantFP::get(input0->getType(), 0.5); |
5582 | 48 | rcp = ConstantVector::getSplat(2, rcp); |
5583 | 48 | combined = Builder.CreateFMul(combined, rcp); |
5584 | 48 | return combined; |
5585 | 120 | } |
5586 | | |
5587 | | Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, |
5588 | | Value *averageUnscaled, float cutoffVal, |
5589 | | DXIL::TessellatorPartitioning partitionMode, |
5590 | 72 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5591 | 72 | Value *clampedResult = *pClampedResult; |
5592 | 72 | Value *clampedVal = clampedResult; |
5593 | 72 | Value *roundedVal = rounded; |
5594 | | // Do partitioning-specific clamping. |
5595 | 72 | Value *clampedAvg = |
5596 | 72 | ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder); |
5597 | 72 | Constant *cutoffVals = |
5598 | 72 | ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal); |
5599 | 72 | if (clampedAvg->getType()->isVectorTy()) |
5600 | 24 | cutoffVals = ConstantVector::getSplat( |
5601 | 24 | clampedAvg->getType()->getVectorNumElements(), cutoffVals); |
5602 | | // Limit the value. |
5603 | 72 | clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, |
5604 | 72 | cutoffVals, hlslOP, Builder); |
5605 | | // Round up for integer/pow2 partitioning. |
5606 | 72 | Value *roundedAvg = |
5607 | 72 | RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder); |
5608 | | |
5609 | 72 | if (rounded->getType() != cutoffVals->getType()) |
5610 | 48 | cutoffVals = ConstantVector::getSplat( |
5611 | 48 | rounded->getType()->getVectorNumElements(), cutoffVals); |
5612 | | // If the scaled value is less than three, then take the unscaled average. |
5613 | 72 | Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals); |
5614 | 72 | if (clampedAvg->getType() != clampedVal->getType()) |
5615 | 48 | clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder); |
5616 | 72 | *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal); |
5617 | | |
5618 | 72 | if (roundedAvg->getType() != roundedVal->getType()) |
5619 | 48 | roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder); |
5620 | 72 | Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal); |
5621 | 72 | return result; |
5622 | 72 | } |
5623 | | |
5624 | | void ResolveQuadAxes(Value **pFinalResult, Value **pClampedResult, |
5625 | | float cutoffVal, |
5626 | | DXIL::TessellatorPartitioning partitionMode, |
5627 | 24 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5628 | 24 | Value *finalResult = *pFinalResult; |
5629 | 24 | Value *clampedResult = *pClampedResult; |
5630 | | |
5631 | 24 | Value *clampR = clampedResult; |
5632 | 24 | Value *finalR = finalResult; |
5633 | 24 | Type *f32Ty = Type::getFloatTy(finalR->getContext()); |
5634 | 24 | Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal); |
5635 | | |
5636 | 24 | Value *minValsX = cutoffVals; |
5637 | 24 | Value *minValsY = |
5638 | 24 | RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder); |
5639 | | |
5640 | 24 | Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0); |
5641 | 24 | Value *clampRY = Builder.CreateExtractElement(clampR, 1); |
5642 | 24 | Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, |
5643 | 24 | clampRY, hlslOP, Builder); |
5644 | | |
5645 | 24 | Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0); |
5646 | 24 | Value *finalRY = Builder.CreateExtractElement(finalR, 1); |
5647 | 24 | Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, |
5648 | 24 | finalRY, hlslOP, Builder); |
5649 | | |
5650 | | // Don't go over our threshold ("final" one is rounded). |
5651 | 24 | Value *optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, |
5652 | 24 | minValsX, hlslOP, Builder); |
5653 | 24 | Value *optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, |
5654 | 24 | minValsY, hlslOP, Builder); |
5655 | | |
5656 | 24 | Value *clampL = SplatToVector(optionX, clampR->getType(), Builder); |
5657 | 24 | Value *finalL = SplatToVector(optionY, finalR->getType(), Builder); |
5658 | | |
5659 | 24 | cutoffVals = ConstantVector::getSplat(2, cutoffVals); |
5660 | 24 | Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals); |
5661 | 24 | *pClampedResult = Builder.CreateSelect(lt, clampL, clampR); |
5662 | 24 | *pFinalResult = Builder.CreateSelect(lt, finalL, finalR); |
5663 | 24 | } |
5664 | | |
5665 | | Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, |
5666 | | OP::OpCode opcode, |
5667 | | HLOperationLowerHelper &helper, |
5668 | | HLObjectOperationLowerHelper *pObjHelper, |
5669 | 288 | bool &Translated) { |
5670 | 288 | hlsl::OP *hlslOP = &helper.hlslOP; |
5671 | | // Get partition mode |
5672 | 288 | DXASSERT_NOMSG(helper.functionProps); |
5673 | 288 | DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, |
5674 | 288 | "must be hull shader"); |
5675 | 288 | DXIL::TessellatorPartitioning partition = |
5676 | 288 | helper.functionProps->ShaderProps.HS.partition; |
5677 | | |
5678 | 288 | IRBuilder<> Builder(CI); |
5679 | | |
5680 | 288 | DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes; |
5681 | 288 | switch (IOP) { |
5682 | 32 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMax: |
5683 | 64 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMax: |
5684 | 96 | case IntrinsicOp::IOP_ProcessTriTessFactorsMax: |
5685 | 96 | tessFactorOp = DXIL::OpCode::FMax; |
5686 | 96 | break; |
5687 | 32 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMin: |
5688 | 64 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMin: |
5689 | 96 | case IntrinsicOp::IOP_ProcessTriTessFactorsMin: |
5690 | 96 | tessFactorOp = DXIL::OpCode::FMin; |
5691 | 96 | break; |
5692 | 96 | default: |
5693 | | // Default is Avg. |
5694 | 96 | break; |
5695 | 288 | } |
5696 | | |
5697 | 288 | Value *rawEdgeFactor = |
5698 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor); |
5699 | | |
5700 | 288 | Value *insideScale = |
5701 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale); |
5702 | | // Clamp to [0.0f..1.0f], NaN->0.0f. |
5703 | 288 | Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder); |
5704 | | // Do partitioning-specific clamping. |
5705 | 288 | Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder); |
5706 | | // Round up for integer/pow2 partitioning. |
5707 | 288 | Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder); |
5708 | | // Store the output. |
5709 | 288 | Value *roundedEdgeFactor = |
5710 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor); |
5711 | 288 | Builder.CreateStore(rounded, roundedEdgeFactor); |
5712 | | |
5713 | | // Clamp to [1.0f..Inf], NaN->1.0f. |
5714 | 288 | bool isQuad = false; |
5715 | 288 | Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder); |
5716 | 288 | Value *factors = nullptr; |
5717 | 288 | switch (IOP) { |
5718 | 32 | case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg: |
5719 | 64 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMax: |
5720 | 96 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMin: |
5721 | 96 | factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5722 | 96 | break; |
5723 | 32 | case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg: |
5724 | 64 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMax: |
5725 | 96 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMin: |
5726 | 96 | factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5727 | 96 | isQuad = true; |
5728 | 96 | break; |
5729 | 32 | case IntrinsicOp::IOP_ProcessTriTessFactorsAvg: |
5730 | 64 | case IntrinsicOp::IOP_ProcessTriTessFactorsMax: |
5731 | 96 | case IntrinsicOp::IOP_ProcessTriTessFactorsMin: |
5732 | 96 | factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5733 | 96 | break; |
5734 | 0 | default: |
5735 | 0 | DXASSERT(0, "invalid opcode for ProcessTessFactor"); |
5736 | 0 | break; |
5737 | 288 | } |
5738 | | |
5739 | 288 | Value *scaledI = nullptr; |
5740 | 288 | if (scales->getType() == factors->getType()) |
5741 | 96 | scaledI = Builder.CreateFMul(factors, scales); |
5742 | 192 | else { |
5743 | 192 | Value *vecFactors = SplatToVector(factors, scales->getType(), Builder); |
5744 | 192 | scaledI = Builder.CreateFMul(vecFactors, scales); |
5745 | 192 | } |
5746 | | |
5747 | | // Do partitioning-specific clamping. |
5748 | 288 | Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder); |
5749 | | |
5750 | | // Round up for integer/pow2 partitioning. |
5751 | 288 | Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder); |
5752 | | |
5753 | 288 | Value *finalI = roundedI; |
5754 | | |
5755 | 288 | if (partition == DXIL::TessellatorPartitioning::FractionalOdd) { |
5756 | | // If not max, set to AVG. |
5757 | 72 | if (tessFactorOp != DXIL::OpCode::FMax) |
5758 | 48 | tessFactorOp = DXIL::OpCode::NumOpCodes; |
5759 | | |
5760 | 72 | bool b2D = false; |
5761 | 72 | Value *avgFactorsI = nullptr; |
5762 | 72 | switch (IOP) { |
5763 | 8 | case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg: |
5764 | 16 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMax: |
5765 | 24 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMin: |
5766 | 24 | avgFactorsI = |
5767 | 24 | Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5768 | 24 | b2D = true; |
5769 | 24 | break; |
5770 | 8 | case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg: |
5771 | 16 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMax: |
5772 | 24 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMin: |
5773 | 24 | avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5774 | 24 | break; |
5775 | 8 | case IntrinsicOp::IOP_ProcessTriTessFactorsAvg: |
5776 | 16 | case IntrinsicOp::IOP_ProcessTriTessFactorsMax: |
5777 | 24 | case IntrinsicOp::IOP_ProcessTriTessFactorsMin: |
5778 | 24 | avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5779 | 24 | break; |
5780 | 0 | default: |
5781 | 0 | DXASSERT(0, "invalid opcode for ProcessTessFactor"); |
5782 | 0 | break; |
5783 | 72 | } |
5784 | | |
5785 | 72 | finalI = ResolveSmallValue(/*inout*/ &clampedI, roundedI, avgFactorsI, |
5786 | 72 | /*cufoff*/ 3.0, partition, hlslOP, Builder); |
5787 | | |
5788 | 72 | if (b2D) |
5789 | 24 | ResolveQuadAxes(/*inout*/ &finalI, /*inout*/ &clampedI, /*cutoff*/ 3.0, |
5790 | 24 | partition, hlslOP, Builder); |
5791 | 72 | } |
5792 | | |
5793 | 288 | Value *unroundedInsideFactor = CI->getArgOperand( |
5794 | 288 | HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor); |
5795 | 288 | Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType(); |
5796 | 288 | if (outFactorTy != clampedI->getType()) { |
5797 | 96 | DXASSERT(isQuad, "quad only write one channel of out factor"); |
5798 | 96 | (void)isQuad; |
5799 | 96 | clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0); |
5800 | | // Splat clampedI to float2. |
5801 | 96 | clampedI = SplatToVector(clampedI, outFactorTy, Builder); |
5802 | 96 | } |
5803 | 288 | Builder.CreateStore(clampedI, unroundedInsideFactor); |
5804 | | |
5805 | 288 | Value *roundedInsideFactor = |
5806 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor); |
5807 | 288 | if (outFactorTy != finalI->getType()) { |
5808 | 96 | DXASSERT(isQuad, "quad only write one channel of out factor"); |
5809 | 96 | finalI = Builder.CreateExtractElement(finalI, (uint64_t)0); |
5810 | | // Splat finalI to float2. |
5811 | 96 | finalI = SplatToVector(finalI, outFactorTy, Builder); |
5812 | 96 | } |
5813 | 288 | Builder.CreateStore(finalI, roundedInsideFactor); |
5814 | 288 | return nullptr; |
5815 | 288 | } |
5816 | | |
5817 | | } // namespace |
5818 | | |
5819 | | // Ray Tracing. |
5820 | | namespace { |
5821 | | Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP, |
5822 | | OP::OpCode opcode, |
5823 | | HLOperationLowerHelper &helper, |
5824 | | HLObjectOperationLowerHelper *pObjHelper, |
5825 | 142 | bool &Translated) { |
5826 | 142 | hlsl::OP *hlslOP = &helper.hlslOP; |
5827 | 142 | Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
5828 | 142 | Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
5829 | 142 | Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
5830 | 142 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5831 | | |
5832 | 142 | Type *Ty = Attr->getType(); |
5833 | 142 | Function *F = hlslOP->GetOpFunc(opcode, Ty); |
5834 | | |
5835 | 142 | IRBuilder<> Builder(CI); |
5836 | 142 | return Builder.CreateCall(F, {opArg, THit, HitKind, Attr}); |
5837 | 142 | } |
5838 | | |
5839 | | Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5840 | | HLOperationLowerHelper &helper, |
5841 | | HLObjectOperationLowerHelper *pObjHelper, |
5842 | 126 | bool &Translated) { |
5843 | 126 | hlsl::OP *hlslOP = &helper.hlslOP; |
5844 | 126 | Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
5845 | 126 | Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
5846 | 126 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5847 | | |
5848 | 126 | Type *Ty = Parameter->getType(); |
5849 | 126 | Function *F = hlslOP->GetOpFunc(opcode, Ty); |
5850 | | |
5851 | 126 | IRBuilder<> Builder(CI); |
5852 | 126 | return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter}); |
5853 | 126 | } |
5854 | | |
5855 | | static void TransferRayDescArgs(Value **Args, hlsl::OP *OP, |
5856 | | IRBuilder<> &Builder, CallInst *CI, |
5857 | 766 | unsigned &Index, unsigned &HLIndex) { |
5858 | | // Extract elements from flattened ray desc arguments in HL op. |
5859 | | // float3 Origin; |
5860 | 766 | Value *origin = CI->getArgOperand(HLIndex++); |
5861 | 766 | Args[Index++] = Builder.CreateExtractElement(origin, (uint64_t)0); |
5862 | 766 | Args[Index++] = Builder.CreateExtractElement(origin, 1); |
5863 | 766 | Args[Index++] = Builder.CreateExtractElement(origin, 2); |
5864 | | // float TMin; |
5865 | 766 | Args[Index++] = CI->getArgOperand(HLIndex++); |
5866 | | // float3 Direction; |
5867 | 766 | Value *direction = CI->getArgOperand(HLIndex++); |
5868 | 766 | Args[Index++] = Builder.CreateExtractElement(direction, (uint64_t)0); |
5869 | 766 | Args[Index++] = Builder.CreateExtractElement(direction, 1); |
5870 | 766 | Args[Index++] = Builder.CreateExtractElement(direction, 2); |
5871 | | // float TMax; |
5872 | 766 | Args[Index++] = CI->getArgOperand(HLIndex++); |
5873 | 766 | } |
5874 | | |
5875 | | Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
5876 | | HLOperationLowerHelper &Helper, |
5877 | | HLObjectOperationLowerHelper *pObjHelper, |
5878 | 548 | bool &Translated) { |
5879 | 548 | hlsl::OP *OP = &Helper.hlslOP; |
5880 | | |
5881 | 548 | Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; |
5882 | 548 | Args[0] = OP->GetU32Const(static_cast<unsigned>(OpCode)); |
5883 | 548 | unsigned Index = 1, HLIndex = 1; |
5884 | 3.83k | while (HLIndex < HLOperandIndex::kTraceRayRayDescOpIdx) |
5885 | 3.28k | Args[Index++] = CI->getArgOperand(HLIndex++); |
5886 | | |
5887 | 548 | IRBuilder<> Builder(CI); |
5888 | 548 | TransferRayDescArgs(Args, OP, Builder, CI, Index, HLIndex); |
5889 | 548 | DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands() - 1); |
5890 | 548 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayPayloadOpIdx); |
5891 | | |
5892 | 548 | Value *Payload = CI->getArgOperand(HLIndex++); |
5893 | 548 | Args[Index++] = Payload; |
5894 | | |
5895 | 548 | DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); |
5896 | 548 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayNumOp); |
5897 | | |
5898 | 548 | Type *Ty = Payload->getType(); |
5899 | 548 | Function *F = OP->GetOpFunc(OpCode, Ty); |
5900 | | |
5901 | 548 | return Builder.CreateCall(F, Args); |
5902 | 548 | } |
5903 | | |
5904 | | // RayQuery methods |
5905 | | |
5906 | | Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, |
5907 | | OP::OpCode opcode, |
5908 | | HLOperationLowerHelper &helper, |
5909 | | HLObjectOperationLowerHelper *pObjHelper, |
5910 | 158 | bool &Translated) { |
5911 | 158 | hlsl::OP *hlslOP = &helper.hlslOP; |
5912 | | // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg |
5913 | 158 | DXASSERT(CI->getNumArgOperands() == 3, |
5914 | 158 | "hlopcode for allocaterayquery always expects 3 arguments"); |
5915 | | |
5916 | 158 | llvm::Value *Arg = |
5917 | 158 | CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx); |
5918 | 158 | llvm::ConstantInt *ConstVal = llvm::dyn_cast<llvm::ConstantInt>(Arg); |
5919 | 158 | DXASSERT(ConstVal, |
5920 | 158 | "2nd argument to allocaterayquery must always be a constant value"); |
5921 | 158 | if (ConstVal->getValue().getZExtValue() != 0) { |
5922 | 6 | Value *refArgs[3] = { |
5923 | 6 | nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx), |
5924 | 6 | CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)}; |
5925 | 6 | opcode = OP::OpCode::AllocateRayQuery2; |
5926 | 6 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
5927 | 6 | } |
5928 | 152 | Value *refArgs[2] = { |
5929 | 152 | nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)}; |
5930 | 152 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
5931 | 158 | } |
5932 | | |
5933 | | Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5934 | | HLOperationLowerHelper &helper, |
5935 | | HLObjectOperationLowerHelper *pObjHelper, |
5936 | 184 | bool &Translated) { |
5937 | 184 | hlsl::OP *hlslOP = &helper.hlslOP; |
5938 | 184 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5939 | | |
5940 | 184 | Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp]; |
5941 | 184 | Args[0] = opArg; |
5942 | 184 | unsigned Index = 1, HLIndex = 1; |
5943 | 920 | while (HLIndex < HLOperandIndex::kTraceRayInlineRayDescOpIdx) |
5944 | 736 | Args[Index++] = CI->getArgOperand(HLIndex++); |
5945 | | |
5946 | 184 | IRBuilder<> Builder(CI); |
5947 | 184 | DXASSERT_NOMSG(HLIndex == HLOperandIndex::kTraceRayInlineRayDescOpIdx); |
5948 | 184 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx); |
5949 | 184 | TransferRayDescArgs(Args, hlslOP, Builder, CI, Index, HLIndex); |
5950 | 184 | DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); |
5951 | 184 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineNumOp); |
5952 | | |
5953 | 184 | Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy()); |
5954 | | |
5955 | 184 | return Builder.CreateCall(F, Args); |
5956 | 184 | } |
5957 | | |
5958 | | Value *TranslateCommitProceduralPrimitiveHit( |
5959 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5960 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5961 | 8 | bool &Translated) { |
5962 | 8 | hlsl::OP *hlslOP = &helper.hlslOP; |
5963 | 8 | Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
5964 | 8 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5965 | 8 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5966 | | |
5967 | 8 | Value *Args[] = {opArg, handle, THit}; |
5968 | | |
5969 | 8 | IRBuilder<> Builder(CI); |
5970 | 8 | Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy()); |
5971 | | |
5972 | 8 | return Builder.CreateCall(F, Args); |
5973 | 8 | } |
5974 | | |
5975 | | Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, |
5976 | | OP::OpCode opcode, |
5977 | | HLOperationLowerHelper &helper, |
5978 | | HLObjectOperationLowerHelper *pObjHelper, |
5979 | 296 | bool &Translated) { |
5980 | 296 | hlsl::OP *hlslOP = &helper.hlslOP; |
5981 | | |
5982 | 296 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5983 | 296 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5984 | | |
5985 | 296 | IRBuilder<> Builder(CI); |
5986 | 296 | Function *F = hlslOP->GetOpFunc(opcode, CI->getType()); |
5987 | | |
5988 | 296 | return Builder.CreateCall(F, {opArg, handle}); |
5989 | 296 | } |
5990 | | |
5991 | | Value *TranslateRayQueryMatrix3x4Operation( |
5992 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5993 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5994 | 32 | bool &Translated) { |
5995 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
5996 | 32 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5997 | 32 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5998 | 32 | uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; |
5999 | 32 | Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); |
6000 | 32 | uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; |
6001 | 32 | Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); |
6002 | 32 | Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, |
6003 | 32 | Ty, CI, hlslOP); |
6004 | 32 | return retVal; |
6005 | 32 | } |
6006 | | |
6007 | | Value *TranslateRayQueryTransposedMatrix3x4Operation( |
6008 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6009 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6010 | 32 | bool &Translated) { |
6011 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
6012 | 32 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6013 | 32 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
6014 | 32 | uint32_t rVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; |
6015 | 32 | Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); |
6016 | 32 | uint8_t cVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; |
6017 | 32 | Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); |
6018 | 32 | Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, |
6019 | 32 | Ty, CI, hlslOP); |
6020 | 32 | return retVal; |
6021 | 32 | } |
6022 | | |
6023 | | Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP, |
6024 | | OP::OpCode opcode, |
6025 | | HLOperationLowerHelper &helper, |
6026 | | HLObjectOperationLowerHelper *pObjHelper, |
6027 | 24 | bool &Translated) { |
6028 | 24 | hlsl::OP *hlslOP = &helper.hlslOP; |
6029 | 24 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6030 | 24 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
6031 | 24 | uint8_t elementVals[] = {0, 1}; |
6032 | 24 | Constant *element = ConstantDataVector::get(CI->getContext(), elementVals); |
6033 | 24 | Value *retVal = |
6034 | 24 | TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP); |
6035 | 24 | return retVal; |
6036 | 24 | } |
6037 | | |
6038 | | Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP, |
6039 | | OP::OpCode opcode, |
6040 | | HLOperationLowerHelper &helper, |
6041 | | HLObjectOperationLowerHelper *pObjHelper, |
6042 | 48 | bool &Translated) { |
6043 | 48 | hlsl::OP *hlslOP = &helper.hlslOP; |
6044 | 48 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6045 | 48 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
6046 | 48 | uint8_t elementVals[] = {0, 1, 2}; |
6047 | 48 | Constant *element = ConstantDataVector::get(CI->getContext(), elementVals); |
6048 | 48 | Value *retVal = |
6049 | 48 | TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP); |
6050 | 48 | return retVal; |
6051 | 48 | } |
6052 | | |
6053 | | Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, |
6054 | | OP::OpCode opcode, |
6055 | | HLOperationLowerHelper &helper, |
6056 | | HLObjectOperationLowerHelper *pObjHelper, |
6057 | 450 | bool &Translated) { |
6058 | 450 | hlsl::OP *hlslOP = &helper.hlslOP; |
6059 | 450 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6060 | 450 | uint8_t vals[] = {0, 1, 2, 3}; |
6061 | 450 | Constant *src = ConstantDataVector::get(CI->getContext(), vals); |
6062 | 450 | Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP); |
6063 | 450 | return retVal; |
6064 | 450 | } |
6065 | | |
6066 | | static Value *ConstructBuiltInTrianglePositionsFromFloat9( |
6067 | 22 | Value *float9Vec, StructType *hlslStructTy, IRBuilder<> &Builder) { |
6068 | 22 | Type *f32Ty = Type::getFloatTy(Builder.getContext()); |
6069 | 22 | Type *float3Ty = VectorType::get(f32Ty, 3); |
6070 | 22 | Value *result = UndefValue::get(hlslStructTy); |
6071 | | |
6072 | | // Build p0, p1, p2 from vector elements 0-2, 3-5, 6-8 |
6073 | 88 | for (unsigned field = 0; field < 3; field++66 ) { |
6074 | 66 | Value *float3 = UndefValue::get(float3Ty); |
6075 | 264 | for (unsigned i = 0; i < 3; i++198 ) { |
6076 | 198 | Value *elem = Builder.CreateExtractElement(float9Vec, field * 3 + i); |
6077 | 198 | float3 = Builder.CreateInsertElement(float3, elem, i); |
6078 | 198 | } |
6079 | 66 | result = Builder.CreateInsertValue(result, float3, field); |
6080 | 66 | } |
6081 | | |
6082 | 22 | return result; |
6083 | 22 | } |
6084 | | |
6085 | | Value *TranslateTriangleObjectPositions( |
6086 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6087 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6088 | 10 | bool &Translated) { |
6089 | 10 | hlsl::OP *hlslOP = &helper.hlslOP; |
6090 | 10 | IRBuilder<> Builder(CI); |
6091 | | |
6092 | 10 | Value *outputPtr = CI->getArgOperand(HLOperandIndex::kIOP_SRetOpIdx); |
6093 | 10 | StructType *hlslStructTy = |
6094 | 10 | cast<StructType>(outputPtr->getType()->getPointerElementType()); |
6095 | | |
6096 | 10 | Type *f32Ty = Type::getFloatTy(CI->getContext()); |
6097 | 10 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, f32Ty); |
6098 | 10 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6099 | | |
6100 | 10 | Value *dxilCall = Builder.CreateCall(dxilFunc, {opArg}); |
6101 | | |
6102 | 10 | Value *structValue = ConstructBuiltInTrianglePositionsFromFloat9( |
6103 | 10 | dxilCall, hlslStructTy, Builder); |
6104 | 10 | Builder.CreateStore(structValue, outputPtr); |
6105 | | |
6106 | 10 | return nullptr; |
6107 | 10 | } |
6108 | | |
6109 | | Value *TranslateRayQueryTriangleObjectPositions( |
6110 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6111 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6112 | 8 | bool &Translated) { |
6113 | 8 | hlsl::OP *hlslOP = &helper.hlslOP; |
6114 | | |
6115 | 8 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
6116 | 8 | StructType *hlslStructTy = |
6117 | 8 | cast<StructType>(CI->getType()->getPointerElementType()); |
6118 | | |
6119 | 8 | Function *F = CI->getParent()->getParent(); |
6120 | 8 | IRBuilder<> AllocaBuilder(&F->getEntryBlock(), F->getEntryBlock().begin()); |
6121 | 8 | AllocaInst *resultAlloca = AllocaBuilder.CreateAlloca(hlslStructTy); |
6122 | | |
6123 | 8 | IRBuilder<> Builder(CI); |
6124 | | |
6125 | 8 | Type *f32Ty = Type::getFloatTy(CI->getContext()); |
6126 | 8 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, f32Ty); |
6127 | 8 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6128 | | |
6129 | 8 | Value *dxilCall = Builder.CreateCall(dxilFunc, {opArg, handle}); |
6130 | | |
6131 | 8 | Value *structValue = ConstructBuiltInTrianglePositionsFromFloat9( |
6132 | 8 | dxilCall, hlslStructTy, Builder); |
6133 | 8 | Builder.CreateStore(structValue, resultAlloca); |
6134 | | |
6135 | 8 | return resultAlloca; |
6136 | 8 | } |
6137 | | |
6138 | | Value *TranslateHitObjectTriangleObjectPositions( |
6139 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6140 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6141 | 4 | bool &Translated) { |
6142 | 4 | hlsl::OP *hlslOP = &helper.hlslOP; |
6143 | | |
6144 | 4 | Value *hitObjectPtr = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
6145 | 4 | StructType *hlslStructTy = |
6146 | 4 | cast<StructType>(CI->getType()->getPointerElementType()); |
6147 | | |
6148 | 4 | Function *F = CI->getParent()->getParent(); |
6149 | 4 | IRBuilder<> AllocaBuilder(&F->getEntryBlock(), F->getEntryBlock().begin()); |
6150 | 4 | AllocaInst *resultAlloca = AllocaBuilder.CreateAlloca(hlslStructTy); |
6151 | | |
6152 | 4 | IRBuilder<> Builder(CI); |
6153 | 4 | Value *hitObject = Builder.CreateLoad(hitObjectPtr); |
6154 | | |
6155 | 4 | Type *f32Ty = Type::getFloatTy(CI->getContext()); |
6156 | 4 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, f32Ty); |
6157 | 4 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6158 | | |
6159 | 4 | Value *dxilCall = Builder.CreateCall(dxilFunc, {opArg, hitObject}); |
6160 | | |
6161 | 4 | Value *structValue = ConstructBuiltInTrianglePositionsFromFloat9( |
6162 | 4 | dxilCall, hlslStructTy, Builder); |
6163 | 4 | Builder.CreateStore(structValue, resultAlloca); |
6164 | | |
6165 | 4 | return resultAlloca; |
6166 | 4 | } |
6167 | | |
6168 | | template <typename ColElemTy> |
6169 | | static void GetMatrixIndices(Constant *&Rows, Constant *&Cols, bool Is3x4, |
6170 | 72 | LLVMContext &Ctx) { |
6171 | 72 | if (Is3x4) { |
6172 | 48 | uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; |
6173 | 48 | Rows = ConstantDataVector::get(Ctx, RVals); |
6174 | 48 | ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; |
6175 | 48 | Cols = ConstantDataVector::get(Ctx, CVals); |
6176 | 48 | return; |
6177 | 48 | } |
6178 | 24 | uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; |
6179 | 24 | Rows = ConstantDataVector::get(Ctx, RVals); |
6180 | 24 | ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; |
6181 | 24 | Cols = ConstantDataVector::get(Ctx, CVals); |
6182 | 24 | } HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned char>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&) Line | Count | Source | 6170 | 56 | LLVMContext &Ctx) { | 6171 | 56 | if (Is3x4) { | 6172 | 40 | uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; | 6173 | 40 | Rows = ConstantDataVector::get(Ctx, RVals); | 6174 | 40 | ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; | 6175 | 40 | Cols = ConstantDataVector::get(Ctx, CVals); | 6176 | 40 | return; | 6177 | 40 | } | 6178 | 16 | uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; | 6179 | 16 | Rows = ConstantDataVector::get(Ctx, RVals); | 6180 | 16 | ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; | 6181 | 16 | Cols = ConstantDataVector::get(Ctx, CVals); | 6182 | 16 | } |
HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned int>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&) Line | Count | Source | 6170 | 16 | LLVMContext &Ctx) { | 6171 | 16 | if (Is3x4) { | 6172 | 8 | uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; | 6173 | 8 | Rows = ConstantDataVector::get(Ctx, RVals); | 6174 | 8 | ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; | 6175 | 8 | Cols = ConstantDataVector::get(Ctx, CVals); | 6176 | 8 | return; | 6177 | 8 | } | 6178 | 8 | uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; | 6179 | 8 | Rows = ConstantDataVector::get(Ctx, RVals); | 6180 | 8 | ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; | 6181 | 8 | Cols = ConstantDataVector::get(Ctx, CVals); | 6182 | 8 | } |
|
6183 | | |
6184 | | Value *TranslateNoArgMatrix3x4Operation( |
6185 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6186 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6187 | 40 | bool &Translated) { |
6188 | 40 | hlsl::OP *hlslOP = &helper.hlslOP; |
6189 | 40 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6190 | 40 | Constant *Rows, *Cols; |
6191 | 40 | GetMatrixIndices<uint8_t>(Rows, Cols, true, CI->getContext()); |
6192 | 40 | return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); |
6193 | 40 | } |
6194 | | |
6195 | | Value *TranslateNoArgTransposedMatrix3x4Operation( |
6196 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6197 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6198 | 16 | bool &Translated) { |
6199 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
6200 | 16 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6201 | 16 | Constant *Rows, *Cols; |
6202 | 16 | GetMatrixIndices<uint8_t>(Rows, Cols, false, CI->getContext()); |
6203 | 16 | return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); |
6204 | 16 | } |
6205 | | |
6206 | | /* |
6207 | | HLSL: |
6208 | | void ThreadNodeOutputRecords<recordType>::OutputComplete(); |
6209 | | void GroupNodeOutputRecords<recordType>::OutputComplete(); |
6210 | | DXIL: |
6211 | | void @dx.op.outputComplete(i32 %Opcode, %dx.types.NodeRecordHandle |
6212 | | %RecordHandle) |
6213 | | */ |
6214 | | Value *TranslateNodeOutputComplete(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
6215 | | HLOperationLowerHelper &helper, |
6216 | | HLObjectOperationLowerHelper *pObjHelper, |
6217 | 146 | bool &Translated) { |
6218 | 146 | hlsl::OP *OP = &helper.hlslOP; |
6219 | | |
6220 | 146 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
6221 | 146 | DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); |
6222 | 146 | Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); |
6223 | 146 | Value *opArg = OP->GetU32Const((unsigned)op); |
6224 | | |
6225 | 146 | IRBuilder<> Builder(CI); |
6226 | 146 | return Builder.CreateCall(dxilFunc, {opArg, handle}); |
6227 | 146 | } |
6228 | | |
6229 | | Value *TranslateNoArgNoReturnPreserveOutput( |
6230 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6231 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6232 | 144 | bool &Translated) { |
6233 | 144 | Instruction *pResult = cast<Instruction>( |
6234 | 144 | TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated)); |
6235 | | // HL intrinsic must have had a return injected just after the call. |
6236 | | // SROA_Parameter_HLSL will copy from alloca to output just before each |
6237 | | // return. Now move call after the copy and just before the return. |
6238 | 144 | if (isa<ReturnInst>(pResult->getNextNode())) |
6239 | 0 | return pResult; |
6240 | 144 | ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator()); |
6241 | 144 | pResult->removeFromParent(); |
6242 | 144 | pResult->insertBefore(RetI); |
6243 | 144 | return pResult; |
6244 | 144 | } |
6245 | | |
6246 | | // Special half dot2 with accumulate to float |
6247 | | Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6248 | | HLOperationLowerHelper &helper, |
6249 | | HLObjectOperationLowerHelper *pObjHelper, |
6250 | 16 | bool &Translated) { |
6251 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
6252 | 16 | Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
6253 | 16 | const unsigned vecSize = 2; |
6254 | 16 | DXASSERT(src0->getType()->isVectorTy() && |
6255 | 16 | vecSize == src0->getType()->getVectorNumElements() && |
6256 | 16 | src0->getType()->getScalarType()->isHalfTy(), |
6257 | 16 | "otherwise, unexpected input dimension or component type"); |
6258 | | |
6259 | 16 | Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
6260 | 16 | DXASSERT(src0->getType() == src1->getType(), |
6261 | 16 | "otherwise, mismatched argument types"); |
6262 | 16 | Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
6263 | 16 | Type *accTy = accArg->getType(); |
6264 | 16 | DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(), |
6265 | 16 | "otherwise, unexpected accumulator type"); |
6266 | 16 | IRBuilder<> Builder(CI); |
6267 | | |
6268 | 16 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy); |
6269 | 16 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6270 | | |
6271 | 16 | SmallVector<Value *, 6> args; |
6272 | 16 | args.emplace_back(opArg); |
6273 | 16 | args.emplace_back(accArg); |
6274 | 48 | for (unsigned i = 0; i < vecSize; i++32 ) |
6275 | 32 | args.emplace_back(Builder.CreateExtractElement(src0, i)); |
6276 | 48 | for (unsigned i = 0; i < vecSize; i++32 ) |
6277 | 32 | args.emplace_back(Builder.CreateExtractElement(src1, i)); |
6278 | 16 | return Builder.CreateCall(dxilFunc, args); |
6279 | 16 | } |
6280 | | |
6281 | | Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6282 | | HLOperationLowerHelper &helper, |
6283 | | HLObjectOperationLowerHelper *pObjHelper, |
6284 | 32 | bool &Translated) { |
6285 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
6286 | 32 | Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
6287 | 32 | DXASSERT( |
6288 | 32 | !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32), |
6289 | 32 | "otherwise, unexpected vector support in high level intrinsic template"); |
6290 | 32 | Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
6291 | 32 | DXASSERT(src0->getType() == src1->getType(), |
6292 | 32 | "otherwise, mismatched argument types"); |
6293 | 32 | Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
6294 | 32 | Type *accTy = accArg->getType(); |
6295 | 32 | DXASSERT( |
6296 | 32 | !accTy->isVectorTy() && accTy->isIntegerTy(32), |
6297 | 32 | "otherwise, unexpected vector support in high level intrinsic template"); |
6298 | 32 | IRBuilder<> Builder(CI); |
6299 | | |
6300 | 32 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy); |
6301 | 32 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6302 | 32 | return Builder.CreateCall(dxilFunc, {opArg, accArg, src0, src1}); |
6303 | 32 | } |
6304 | | |
6305 | | Value *TranslatePack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6306 | | HLOperationLowerHelper &helper, |
6307 | | HLObjectOperationLowerHelper *pObjHelper, |
6308 | 72 | bool &Translated) { |
6309 | 72 | hlsl::OP *hlslOP = &helper.hlslOP; |
6310 | | |
6311 | 72 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
6312 | 72 | Type *valTy = val->getType(); |
6313 | 72 | Type *eltTy = valTy->getScalarType(); |
6314 | | |
6315 | 72 | DXASSERT(valTy->isVectorTy() && valTy->getVectorNumElements() == 4 && |
6316 | 72 | eltTy->isIntegerTy() && |
6317 | 72 | (eltTy->getIntegerBitWidth() == 32 || |
6318 | 72 | eltTy->getIntegerBitWidth() == 16), |
6319 | 72 | "otherwise, unexpected input dimension or component type"); |
6320 | | |
6321 | 72 | DXIL::PackMode packMode = DXIL::PackMode::Trunc; |
6322 | 72 | switch (IOP) { |
6323 | 18 | case hlsl::IntrinsicOp::IOP_pack_clamp_s8: |
6324 | 18 | packMode = DXIL::PackMode::SClamp; |
6325 | 18 | break; |
6326 | 18 | case hlsl::IntrinsicOp::IOP_pack_clamp_u8: |
6327 | 18 | packMode = DXIL::PackMode::UClamp; |
6328 | 18 | break; |
6329 | 18 | case hlsl::IntrinsicOp::IOP_pack_s8: |
6330 | 36 | case hlsl::IntrinsicOp::IOP_pack_u8: |
6331 | 36 | packMode = DXIL::PackMode::Trunc; |
6332 | 36 | break; |
6333 | 0 | default: |
6334 | 0 | DXASSERT(false, "unexpected opcode"); |
6335 | 0 | break; |
6336 | 72 | } |
6337 | | |
6338 | 72 | IRBuilder<> Builder(CI); |
6339 | 72 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, eltTy); |
6340 | 72 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6341 | 72 | Constant *packModeArg = hlslOP->GetU8Const((unsigned)packMode); |
6342 | | |
6343 | 72 | Value *elt0 = Builder.CreateExtractElement(val, (uint64_t)0); |
6344 | 72 | Value *elt1 = Builder.CreateExtractElement(val, (uint64_t)1); |
6345 | 72 | Value *elt2 = Builder.CreateExtractElement(val, (uint64_t)2); |
6346 | 72 | Value *elt3 = Builder.CreateExtractElement(val, (uint64_t)3); |
6347 | 72 | return Builder.CreateCall(dxilFunc, |
6348 | 72 | {opArg, packModeArg, elt0, elt1, elt2, elt3}); |
6349 | 72 | } |
6350 | | |
6351 | | Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6352 | | HLOperationLowerHelper &helper, |
6353 | | HLObjectOperationLowerHelper *pObjHelper, |
6354 | 88 | bool &Translated) { |
6355 | 88 | hlsl::OP *hlslOP = &helper.hlslOP; |
6356 | | |
6357 | 88 | Value *packedVal = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
6358 | 88 | DXASSERT( |
6359 | 88 | !packedVal->getType()->isVectorTy() && |
6360 | 88 | packedVal->getType()->isIntegerTy(32), |
6361 | 88 | "otherwise, unexpected vector support in high level intrinsic template"); |
6362 | | |
6363 | 88 | Type *overloadType = nullptr; |
6364 | 88 | DXIL::UnpackMode unpackMode = DXIL::UnpackMode::Unsigned; |
6365 | 88 | switch (IOP) { |
6366 | 24 | case hlsl::IntrinsicOp::IOP_unpack_s8s32: |
6367 | 24 | unpackMode = DXIL::UnpackMode::Signed; |
6368 | 24 | overloadType = helper.i32Ty; |
6369 | 24 | break; |
6370 | 24 | case hlsl::IntrinsicOp::IOP_unpack_u8u32: |
6371 | 24 | unpackMode = DXIL::UnpackMode::Unsigned; |
6372 | 24 | overloadType = helper.i32Ty; |
6373 | 24 | break; |
6374 | 20 | case hlsl::IntrinsicOp::IOP_unpack_s8s16: |
6375 | 20 | unpackMode = DXIL::UnpackMode::Signed; |
6376 | 20 | overloadType = helper.i16Ty; |
6377 | 20 | break; |
6378 | 20 | case hlsl::IntrinsicOp::IOP_unpack_u8u16: |
6379 | 20 | unpackMode = DXIL::UnpackMode::Unsigned; |
6380 | 20 | overloadType = helper.i16Ty; |
6381 | 20 | break; |
6382 | 0 | default: |
6383 | 0 | DXASSERT(false, "unexpected opcode"); |
6384 | 0 | break; |
6385 | 88 | } |
6386 | | |
6387 | 88 | IRBuilder<> Builder(CI); |
6388 | 88 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, overloadType); |
6389 | 88 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6390 | 88 | Constant *unpackModeArg = hlslOP->GetU8Const((unsigned)unpackMode); |
6391 | 88 | Value *Res = Builder.CreateCall(dxilFunc, {opArg, unpackModeArg, packedVal}); |
6392 | | |
6393 | | // Convert the final aggregate into a vector to make the types match |
6394 | 88 | const unsigned vecSize = 4; |
6395 | 88 | Value *ResVec = UndefValue::get(CI->getType()); |
6396 | 440 | for (unsigned i = 0; i < vecSize; ++i352 ) { |
6397 | 352 | Value *Elt = Builder.CreateExtractValue(Res, i); |
6398 | 352 | ResVec = Builder.CreateInsertElement(ResVec, Elt, i); |
6399 | 352 | } |
6400 | 88 | return ResVec; |
6401 | 88 | } |
6402 | | |
6403 | | } // namespace |
6404 | | |
6405 | | // Shader Execution Reordering. |
6406 | | namespace { |
6407 | | Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, |
6408 | | OP::OpCode Opcode, |
6409 | | HLOperationLowerHelper &Helper, |
6410 | | HLObjectOperationLowerHelper *ObjHelper, |
6411 | 44 | bool &Translated) { |
6412 | 44 | hlsl::OP *HlslOP = &Helper.hlslOP; |
6413 | 44 | IRBuilder<> Builder(CI); |
6414 | 44 | Value *HitObjectPtr = CI->getArgOperand(1); |
6415 | 44 | Value *HitObject = TrivialDxilOperation( |
6416 | 44 | Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); |
6417 | 44 | Builder.CreateStore(HitObject, HitObjectPtr); |
6418 | 44 | DXASSERT( |
6419 | 44 | CI->use_empty(), |
6420 | 44 | "Default ctor return type is a Clang artifact. Value must not be used"); |
6421 | 44 | return nullptr; |
6422 | 44 | } |
6423 | | |
6424 | | Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP, |
6425 | | OP::OpCode Opcode, |
6426 | | HLOperationLowerHelper &Helper, |
6427 | | HLObjectOperationLowerHelper *ObjHelper, |
6428 | 24 | bool &Translated) { |
6429 | 24 | DXASSERT_NOMSG(CI->getNumArgOperands() == |
6430 | 24 | HLOperandIndex::kHitObjectMakeMiss_NumOp); |
6431 | 24 | hlsl::OP *OP = &Helper.hlslOP; |
6432 | 24 | IRBuilder<> Builder(CI); |
6433 | 24 | Value *Args[DXIL::OperandIndex::kHitObjectMakeMiss_NumOp]; |
6434 | 24 | Args[0] = nullptr; // Filled in by TrivialDxilOperation |
6435 | | |
6436 | 24 | unsigned DestIdx = 1, SrcIdx = 1; |
6437 | 24 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6438 | 24 | Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // RayFlags |
6439 | 24 | Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // MissShaderIdx |
6440 | | |
6441 | 24 | DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx); |
6442 | 24 | DXASSERT_NOMSG(DestIdx == |
6443 | 24 | DXIL::OperandIndex::kHitObjectMakeMiss_RayDescOpIdx); |
6444 | 24 | TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); |
6445 | 24 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6446 | 24 | DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectMakeMiss_NumOp); |
6447 | | |
6448 | 24 | Value *OutHitObject = |
6449 | 24 | TrivialDxilOperation(Opcode, Args, Helper.voidTy, CI, OP); |
6450 | 24 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6451 | 24 | return nullptr; |
6452 | 24 | } |
6453 | | |
6454 | | Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP, |
6455 | | OP::OpCode OpCode, |
6456 | | HLOperationLowerHelper &Helper, |
6457 | | HLObjectOperationLowerHelper *pObjHelper, |
6458 | 36 | bool &Translated) { |
6459 | 36 | hlsl::OP *OP = &Helper.hlslOP; |
6460 | | |
6461 | | // clang-format off |
6462 | | // Match MaybeReorderThread overload variants: |
6463 | | // void MaybeReorderThread(<Op>, |
6464 | | // HitObject Hit); |
6465 | | // void MaybeReorderThread(<Op>, |
6466 | | // uint CoherenceHint, |
6467 | | // uint NumCoherenceHintBitsFromLSB ); |
6468 | | // void MaybeReorderThread(<Op>, |
6469 | | // HitObject Hit, |
6470 | | // uint CoherenceHint, |
6471 | | // uint NumCoherenceHintBitsFromLSB); |
6472 | | // clang-format on |
6473 | 36 | const unsigned NumHLArgs = CI->getNumArgOperands(); |
6474 | 36 | DXASSERT_NOMSG(NumHLArgs >= 2); |
6475 | | |
6476 | | // Use a NOP HitObject for MaybeReorderThread without HitObject. |
6477 | 36 | Value *HitObject = nullptr; |
6478 | 36 | unsigned HLIndex = 1; |
6479 | 36 | if (3 == NumHLArgs) { |
6480 | 6 | HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr}, |
6481 | 6 | Type::getVoidTy(CI->getContext()), CI, OP); |
6482 | 30 | } else { |
6483 | 30 | Value *FirstParam = CI->getArgOperand(HLIndex); |
6484 | 30 | DXASSERT_NOMSG(isa<PointerType>(FirstParam->getType())); |
6485 | 30 | IRBuilder<> Builder(CI); |
6486 | 30 | HitObject = Builder.CreateLoad(FirstParam); |
6487 | 30 | HLIndex++; |
6488 | 30 | } |
6489 | | |
6490 | | // If there are trailing parameters, these have to be the two coherence bit |
6491 | | // parameters |
6492 | 36 | Value *CoherenceHint = nullptr; |
6493 | 36 | Value *NumCoherenceHintBits = nullptr; |
6494 | 36 | if (2 != NumHLArgs) { |
6495 | 12 | DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs); |
6496 | 12 | CoherenceHint = CI->getArgOperand(HLIndex++); |
6497 | 12 | NumCoherenceHintBits = CI->getArgOperand(HLIndex++); |
6498 | 12 | DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType()); |
6499 | 12 | DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType()); |
6500 | 24 | } else { |
6501 | 24 | CoherenceHint = UndefValue::get(Helper.i32Ty); |
6502 | 24 | NumCoherenceHintBits = OP->GetU32Const(0); |
6503 | 24 | } |
6504 | | |
6505 | 36 | TrivialDxilOperation( |
6506 | 36 | OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits}, |
6507 | 36 | Type::getVoidTy(CI->getContext()), CI, OP); |
6508 | 36 | return nullptr; |
6509 | 36 | } |
6510 | | |
6511 | | Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP, |
6512 | | OP::OpCode OpCode, |
6513 | | HLOperationLowerHelper &Helper, |
6514 | | HLObjectOperationLowerHelper *pObjHelper, |
6515 | 8 | bool &Translated) { |
6516 | 8 | hlsl::OP *OP = &Helper.hlslOP; |
6517 | 8 | IRBuilder<> Builder(CI); |
6518 | | |
6519 | 8 | unsigned SrcIdx = 1; |
6520 | 8 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6521 | 8 | Value *RayQuery = CI->getArgOperand(SrcIdx++); |
6522 | | |
6523 | 8 | if (CI->getNumArgOperands() == |
6524 | 8 | HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) { |
6525 | 4 | Value *HitKind = CI->getArgOperand(SrcIdx++); |
6526 | 4 | Value *AttribSrc = CI->getArgOperand(SrcIdx++); |
6527 | 4 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6528 | 4 | OpCode = DXIL::OpCode::HitObject_FromRayQueryWithAttrs; |
6529 | 4 | Type *AttrTy = AttribSrc->getType(); |
6530 | 4 | Value *OutHitObject = TrivialDxilOperation( |
6531 | 4 | OpCode, {nullptr, RayQuery, HitKind, AttribSrc}, AttrTy, CI, OP); |
6532 | 4 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6533 | 4 | return nullptr; |
6534 | 4 | } |
6535 | | |
6536 | 4 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6537 | 4 | OpCode = DXIL::OpCode::HitObject_FromRayQuery; |
6538 | 4 | Value *OutHitObject = |
6539 | 4 | TrivialDxilOperation(OpCode, {nullptr, RayQuery}, Helper.voidTy, CI, OP); |
6540 | 4 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6541 | 4 | return nullptr; |
6542 | 8 | } |
6543 | | |
6544 | | Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, |
6545 | | OP::OpCode OpCode, |
6546 | | HLOperationLowerHelper &Helper, |
6547 | | HLObjectOperationLowerHelper *pObjHelper, |
6548 | 10 | bool &Translated) { |
6549 | 10 | hlsl::OP *OP = &Helper.hlslOP; |
6550 | 10 | IRBuilder<> Builder(CI); |
6551 | | |
6552 | 10 | DXASSERT_NOMSG(CI->getNumArgOperands() == |
6553 | 10 | HLOperandIndex::kHitObjectTraceRay_NumOp); |
6554 | 10 | Value *Args[DXIL::OperandIndex::kHitObjectTraceRay_NumOp]; |
6555 | 10 | Value *OpArg = OP->GetU32Const(static_cast<unsigned>(OpCode)); |
6556 | 10 | Args[0] = OpArg; |
6557 | | |
6558 | 10 | unsigned DestIdx = 1, SrcIdx = 1; |
6559 | 10 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6560 | 10 | Args[DestIdx++] = CI->getArgOperand(SrcIdx++); |
6561 | 60 | for (; SrcIdx < HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx; |
6562 | 50 | ++SrcIdx, ++DestIdx) { |
6563 | 50 | Args[DestIdx] = CI->getArgOperand(SrcIdx); |
6564 | 50 | } |
6565 | | |
6566 | 10 | DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx); |
6567 | 10 | DXASSERT_NOMSG(DestIdx == |
6568 | 10 | DXIL::OperandIndex::kHitObjectTraceRay_RayDescOpIdx); |
6569 | 10 | TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); |
6570 | 10 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands() - 1); |
6571 | 10 | DXASSERT_NOMSG(DestIdx == |
6572 | 10 | DXIL::OperandIndex::kHitObjectTraceRay_PayloadOpIdx); |
6573 | | |
6574 | 10 | Value *Payload = CI->getArgOperand(SrcIdx++); |
6575 | 10 | Args[DestIdx++] = Payload; |
6576 | | |
6577 | 10 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6578 | 10 | DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectTraceRay_NumOp); |
6579 | | |
6580 | 10 | Function *F = OP->GetOpFunc(OpCode, Payload->getType()); |
6581 | | |
6582 | 10 | Value *OutHitObject = Builder.CreateCall(F, Args); |
6583 | 10 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6584 | 10 | return nullptr; |
6585 | 10 | } |
6586 | | |
6587 | | Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, |
6588 | | OP::OpCode OpCode, |
6589 | | HLOperationLowerHelper &Helper, |
6590 | | HLObjectOperationLowerHelper *pObjHelper, |
6591 | 4 | bool &Translated) { |
6592 | 4 | unsigned SrcIdx = 1; |
6593 | 4 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6594 | 4 | Value *Payload = CI->getArgOperand(SrcIdx++); |
6595 | 4 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6596 | | |
6597 | 4 | IRBuilder<> Builder(CI); |
6598 | 4 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6599 | 4 | TrivialDxilOperation(OpCode, {nullptr, HitObject, Payload}, |
6600 | 4 | Payload->getType(), CI, &Helper.hlslOP); |
6601 | 4 | return nullptr; |
6602 | 4 | } |
6603 | | |
6604 | | Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, |
6605 | | OP::OpCode OpCode, |
6606 | | HLOperationLowerHelper &Helper, |
6607 | | HLObjectOperationLowerHelper *pObjHelper, |
6608 | 6 | bool &Translated) { |
6609 | 6 | hlsl::OP *OP = &Helper.hlslOP; |
6610 | 6 | IRBuilder<> Builder(CI); |
6611 | | |
6612 | 6 | Value *HitObjectPtr = CI->getArgOperand(1); |
6613 | 6 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6614 | 6 | Value *AttrOutPtr = |
6615 | 6 | CI->getArgOperand(HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx); |
6616 | 6 | TrivialDxilOperation(OpCode, {nullptr, HitObject, AttrOutPtr}, |
6617 | 6 | AttrOutPtr->getType(), CI, OP); |
6618 | 6 | return nullptr; |
6619 | 6 | } |
6620 | | |
6621 | | Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, |
6622 | | OP::OpCode OpCode, |
6623 | | HLOperationLowerHelper &Helper, |
6624 | | HLObjectOperationLowerHelper *pObjHelper, |
6625 | 74 | bool &Translated) { |
6626 | 74 | hlsl::OP *OP = &Helper.hlslOP; |
6627 | 74 | Value *HitObjectPtr = CI->getArgOperand(1); |
6628 | 74 | IRBuilder<> Builder(CI); |
6629 | 74 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6630 | 74 | return TrivialDxilOperation(OpCode, {nullptr, HitObject}, CI->getType(), CI, |
6631 | 74 | OP); |
6632 | 74 | } |
6633 | | |
6634 | | Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, |
6635 | | OP::OpCode OpCode, |
6636 | | HLOperationLowerHelper &Helper, |
6637 | | HLObjectOperationLowerHelper *pObjHelper, |
6638 | 16 | bool &Translated) { |
6639 | 16 | hlsl::OP *OP = &Helper.hlslOP; |
6640 | 16 | Value *HitObjectPtr = CI->getArgOperand(1); |
6641 | 16 | IRBuilder<> Builder(CI); |
6642 | 16 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6643 | 16 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6644 | 16 | uint32_t Vals[] = {0, 1, 2, 3}; |
6645 | 16 | Constant *Src = ConstantDataVector::get(CI->getContext(), Vals); |
6646 | 16 | return TrivialDxilOperation(OpCode, {nullptr, HitObject, Src}, Ty, CI, OP); |
6647 | 16 | } |
6648 | | |
6649 | 16 | static bool IsHitObject3x4Getter(IntrinsicOp IOP) { |
6650 | 16 | switch (IOP) { |
6651 | 8 | default: |
6652 | 8 | return false; |
6653 | 4 | case IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4: |
6654 | 8 | case IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4: |
6655 | 8 | return true; |
6656 | 16 | } |
6657 | 16 | } |
6658 | | |
6659 | | Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, |
6660 | | OP::OpCode OpCode, |
6661 | | HLOperationLowerHelper &Helper, |
6662 | | HLObjectOperationLowerHelper *pObjHelper, |
6663 | 16 | bool &Translated) { |
6664 | 16 | hlsl::OP *OP = &Helper.hlslOP; |
6665 | 16 | Value *HitObjectPtr = CI->getArgOperand(1); |
6666 | 16 | IRBuilder<> Builder(CI); |
6667 | 16 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6668 | | |
6669 | | // Create 3x4 matrix indices |
6670 | 16 | bool Is3x4 = IsHitObject3x4Getter(IOP); |
6671 | 16 | Constant *Rows, *Cols; |
6672 | 16 | GetMatrixIndices<uint32_t>(Rows, Cols, Is3x4, CI->getContext()); |
6673 | | |
6674 | 16 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6675 | 16 | return TrivialDxilOperation(OpCode, {nullptr, HitObject, Rows, Cols}, Ty, CI, |
6676 | 16 | OP); |
6677 | 16 | } |
6678 | | |
6679 | | Value *TranslateHitObjectLoadLocalRootTableConstant( |
6680 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6681 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, |
6682 | 4 | bool &Translated) { |
6683 | 4 | hlsl::OP *OP = &Helper.hlslOP; |
6684 | 4 | IRBuilder<> Builder(CI); |
6685 | | |
6686 | 4 | Value *HitObjectPtr = CI->getArgOperand(1); |
6687 | 4 | Value *Offset = CI->getArgOperand(2); |
6688 | | |
6689 | 4 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6690 | 4 | return TrivialDxilOperation(OpCode, {nullptr, HitObject, Offset}, |
6691 | 4 | Helper.voidTy, CI, OP); |
6692 | 4 | } |
6693 | | |
6694 | | Value *TranslateHitObjectSetShaderTableIndex( |
6695 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6696 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, |
6697 | 4 | bool &Translated) { |
6698 | 4 | hlsl::OP *OP = &Helper.hlslOP; |
6699 | 4 | IRBuilder<> Builder(CI); |
6700 | | |
6701 | 4 | Value *HitObjectPtr = CI->getArgOperand(1); |
6702 | 4 | Value *ShaderTableIndex = CI->getArgOperand(2); |
6703 | | |
6704 | 4 | Value *InHitObject = Builder.CreateLoad(HitObjectPtr); |
6705 | 4 | Value *OutHitObject = TrivialDxilOperation( |
6706 | 4 | OpCode, {nullptr, InHitObject, ShaderTableIndex}, Helper.voidTy, CI, OP); |
6707 | 4 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6708 | 4 | return nullptr; |
6709 | 4 | } |
6710 | | |
6711 | | } // namespace |
6712 | | |
6713 | | // Resource Handle. |
6714 | | namespace { |
6715 | | Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP, |
6716 | | DXIL::OpCode opcode, |
6717 | | HLOperationLowerHelper &helper, |
6718 | | HLObjectOperationLowerHelper *pObjHelper, |
6719 | 602 | bool &Translated) { |
6720 | 602 | hlsl::OP &hlslOP = helper.hlslOP; |
6721 | 602 | Function *dxilFunc = hlslOP.GetOpFunc(opcode, helper.voidTy); |
6722 | 602 | IRBuilder<> Builder(CI); |
6723 | 602 | Value *opArg = ConstantInt::get(helper.i32Ty, (unsigned)opcode); |
6724 | 602 | return Builder.CreateCall( |
6725 | 602 | dxilFunc, {opArg, CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx), |
6726 | 602 | CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx), |
6727 | | // TODO: update nonUniformIndex later. |
6728 | 602 | Builder.getInt1(false)}); |
6729 | 602 | } |
6730 | | } // namespace |
6731 | | |
6732 | | // Translate and/or/select intrinsics |
6733 | | namespace { |
6734 | | Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6735 | | HLOperationLowerHelper &helper, |
6736 | | HLObjectOperationLowerHelper *pObjHelper, |
6737 | 60 | bool &Translated) { |
6738 | 60 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
6739 | 60 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
6740 | 60 | IRBuilder<> Builder(CI); |
6741 | | |
6742 | 60 | return Builder.CreateAnd(x, y); |
6743 | 60 | } |
6744 | | Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6745 | | HLOperationLowerHelper &helper, |
6746 | 60 | HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { |
6747 | 60 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
6748 | 60 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
6749 | 60 | IRBuilder<> Builder(CI); |
6750 | | |
6751 | 60 | return Builder.CreateOr(x, y); |
6752 | 60 | } |
6753 | | Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6754 | | HLOperationLowerHelper &helper, |
6755 | | HLObjectOperationLowerHelper *pObjHelper, |
6756 | 30 | bool &Translated) { |
6757 | 30 | Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
6758 | 30 | Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
6759 | 30 | Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
6760 | 30 | IRBuilder<> Builder(CI); |
6761 | | |
6762 | 30 | return Builder.CreateSelect(cond, t, f); |
6763 | 30 | } |
6764 | | |
6765 | | Value *TranslateLinAlgFillMatrix(CallInst *CI, IntrinsicOp IOP, |
6766 | | OP::OpCode OpCode, |
6767 | | HLOperationLowerHelper &Helper, |
6768 | | HLObjectOperationLowerHelper *ObjHelper, |
6769 | 44 | bool &Translated) { |
6770 | 44 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6771 | 44 | IRBuilder<> Builder(CI); |
6772 | | |
6773 | 44 | Value *MatrixPtr = CI->getArgOperand(1); |
6774 | 44 | DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType())); |
6775 | 44 | Type *MatrixType = MatrixPtr->getType()->getPointerElementType(); |
6776 | 44 | Value *Scalar = CI->getArgOperand(2); |
6777 | | |
6778 | 44 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6779 | 44 | Function *DxilFunc = |
6780 | 44 | HlslOp->GetOpFunc(OpCode, {MatrixType, Scalar->getType()}); |
6781 | | |
6782 | 44 | Value *Matrix = Builder.CreateCall(DxilFunc, {OpArg, Scalar}); |
6783 | 44 | Builder.CreateStore(Matrix, MatrixPtr); |
6784 | | |
6785 | 44 | return nullptr; |
6786 | 44 | } |
6787 | | |
6788 | | Value *TranslateLinAlgMatrixAccumStoreToDescriptor( |
6789 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6790 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
6791 | 10 | bool &Translated) { |
6792 | 10 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6793 | 10 | IRBuilder<> Builder(CI); |
6794 | | |
6795 | 10 | Value *Matrix = CI->getArgOperand(1); |
6796 | 10 | Value *ResHandle = CI->getArgOperand(2); |
6797 | 10 | Value *Offset = CI->getArgOperand(3); |
6798 | 10 | Value *Stride = CI->getArgOperand(4); |
6799 | 10 | Value *Layout = CI->getArgOperand(5); |
6800 | 10 | Value *Align = CI->getArgOperand(6); |
6801 | | |
6802 | 10 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6803 | 10 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Matrix->getType()); |
6804 | | |
6805 | 10 | return Builder.CreateCall( |
6806 | 10 | DxilFunc, {OpArg, Matrix, ResHandle, Offset, Stride, Layout, Align}); |
6807 | 10 | } |
6808 | | |
6809 | | Value *TranslateLinAlgMatVecMul(CallInst *CI, IntrinsicOp IOP, |
6810 | | OP::OpCode OpCode, |
6811 | | HLOperationLowerHelper &Helper, |
6812 | | HLObjectOperationLowerHelper *ObjHelper, |
6813 | 4 | bool &Translated) { |
6814 | 4 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6815 | 4 | IRBuilder<> Builder(CI); |
6816 | | |
6817 | 4 | Value *ReturnVecPtr = CI->getArgOperand(1); |
6818 | 4 | DXASSERT_NOMSG(isa<PointerType>(ReturnVecPtr->getType())); |
6819 | 4 | Type *ReturnVecType = ReturnVecPtr->getType()->getPointerElementType(); |
6820 | | |
6821 | 4 | Value *Matrix = CI->getArgOperand(2); |
6822 | 4 | Value *IsOutputSigned = CI->getArgOperand(3); |
6823 | 4 | Value *InputVector = CI->getArgOperand(4); |
6824 | 4 | Value *InputVectorInterp = CI->getArgOperand(5); |
6825 | | |
6826 | 4 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6827 | 4 | Function *DxilFunc = HlslOp->GetOpFunc( |
6828 | 4 | OpCode, {ReturnVecType, Matrix->getType(), InputVector->getType()}); |
6829 | | |
6830 | 4 | Value *ReturnVec = |
6831 | 4 | Builder.CreateCall(DxilFunc, {OpArg, Matrix, IsOutputSigned, InputVector, |
6832 | 4 | InputVectorInterp}); |
6833 | 4 | Builder.CreateStore(ReturnVec, ReturnVecPtr); |
6834 | | |
6835 | 4 | return nullptr; |
6836 | 4 | } |
6837 | | |
6838 | | Value *TranslateLinAlgMatVecMulAdd(CallInst *CI, IntrinsicOp IOP, |
6839 | | OP::OpCode OpCode, |
6840 | | HLOperationLowerHelper &Helper, |
6841 | | HLObjectOperationLowerHelper *ObjHelper, |
6842 | 14 | bool &Translated) { |
6843 | 14 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6844 | 14 | IRBuilder<> Builder(CI); |
6845 | | |
6846 | 14 | Value *ReturnVecPtr = CI->getArgOperand(1); |
6847 | 14 | DXASSERT_NOMSG(isa<PointerType>(ReturnVecPtr->getType())); |
6848 | 14 | Type *ReturnVecType = ReturnVecPtr->getType()->getPointerElementType(); |
6849 | | |
6850 | 14 | Value *Matrix = CI->getArgOperand(2); |
6851 | 14 | Value *IsOutputSigned = CI->getArgOperand(3); |
6852 | 14 | Value *InputVector = CI->getArgOperand(4); |
6853 | 14 | Value *InputVectorInterp = CI->getArgOperand(5); |
6854 | 14 | Value *BiasVector = CI->getArgOperand(6); |
6855 | 14 | Value *BiasVectorInterp = CI->getArgOperand(7); |
6856 | | |
6857 | 14 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6858 | 14 | Function *DxilFunc = HlslOp->GetOpFunc( |
6859 | 14 | OpCode, {ReturnVecType, Matrix->getType(), InputVector->getType(), |
6860 | 14 | BiasVector->getType()}); |
6861 | | |
6862 | 14 | Value *ReturnVec = Builder.CreateCall( |
6863 | 14 | DxilFunc, {OpArg, Matrix, IsOutputSigned, InputVector, InputVectorInterp, |
6864 | 14 | BiasVector, BiasVectorInterp}); |
6865 | 14 | Builder.CreateStore(ReturnVec, ReturnVecPtr); |
6866 | | |
6867 | 14 | return nullptr; |
6868 | 14 | } |
6869 | | |
6870 | | Value *TranslateLinAlgMatrixLoadFromDescriptor( |
6871 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6872 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
6873 | 10 | bool &Translated) { |
6874 | 10 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6875 | 10 | IRBuilder<> Builder(CI); |
6876 | | |
6877 | 10 | Value *MatrixPtr = CI->getArgOperand(1); |
6878 | 10 | DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType())); |
6879 | 10 | Type *MatrixType = MatrixPtr->getType()->getPointerElementType(); |
6880 | | |
6881 | 10 | Value *ResHandle = CI->getArgOperand(2); |
6882 | 10 | Value *Offset = CI->getArgOperand(3); |
6883 | 10 | Value *Stride = CI->getArgOperand(4); |
6884 | 10 | Value *Layout = CI->getArgOperand(5); |
6885 | 10 | Value *Align = CI->getArgOperand(6); |
6886 | | |
6887 | 10 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6888 | 10 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, MatrixType); |
6889 | | |
6890 | 10 | Value *Matrix = Builder.CreateCall( |
6891 | 10 | DxilFunc, {OpArg, ResHandle, Offset, Stride, Layout, Align}); |
6892 | 10 | Builder.CreateStore(Matrix, MatrixPtr); |
6893 | | |
6894 | 10 | return nullptr; |
6895 | 10 | } |
6896 | | |
6897 | | Value *TranslateLinAlgMatrixOuterProduct( |
6898 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6899 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
6900 | 12 | bool &Translated) { |
6901 | 12 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6902 | 12 | IRBuilder<> Builder(CI); |
6903 | | |
6904 | 12 | Value *MatrixPtr = CI->getArgOperand(1); |
6905 | 12 | DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType())); |
6906 | 12 | Type *MatrixType = MatrixPtr->getType()->getPointerElementType(); |
6907 | 12 | Value *VecA = CI->getArgOperand(2); |
6908 | 12 | Value *VecB = CI->getArgOperand(3); |
6909 | | |
6910 | 12 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6911 | 12 | Function *DxilFunc = |
6912 | 12 | HlslOp->GetOpFunc(OpCode, {MatrixType, VecA->getType(), VecB->getType()}); |
6913 | | |
6914 | 12 | Value *Matrix = Builder.CreateCall(DxilFunc, {OpArg, VecA, VecB}); |
6915 | 12 | Builder.CreateStore(Matrix, MatrixPtr); |
6916 | | |
6917 | 12 | return nullptr; |
6918 | 12 | } |
6919 | | |
6920 | | Value *TranslateLinAlgMatrixAccumulate(CallInst *CI, IntrinsicOp IOP, |
6921 | | OP::OpCode OpCode, |
6922 | | HLOperationLowerHelper &Helper, |
6923 | | HLObjectOperationLowerHelper *ObjHelper, |
6924 | 6 | bool &Translated) { |
6925 | 6 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6926 | 6 | IRBuilder<> Builder(CI); |
6927 | | |
6928 | 6 | Value *MatrixCPtr = CI->getArgOperand(1); |
6929 | 6 | DXASSERT_NOMSG(isa<PointerType>(MatrixCPtr->getType())); |
6930 | 6 | Type *MatrixCType = MatrixCPtr->getType()->getPointerElementType(); |
6931 | | |
6932 | 6 | Value *MatrixLHS = CI->getArgOperand(2); |
6933 | 6 | Value *MatrixRHS = CI->getArgOperand(3); |
6934 | | |
6935 | 6 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6936 | 6 | Function *DxilFunc = HlslOp->GetOpFunc( |
6937 | 6 | OpCode, {MatrixCType, MatrixLHS->getType(), MatrixRHS->getType()}); |
6938 | | |
6939 | 6 | Value *MatrixC = Builder.CreateCall(DxilFunc, {OpArg, MatrixLHS, MatrixRHS}); |
6940 | 6 | Builder.CreateStore(MatrixC, MatrixCPtr); |
6941 | | |
6942 | 6 | return nullptr; |
6943 | 6 | } |
6944 | | |
6945 | | Value *TranslateLinAlgMatrixGetCoordinate( |
6946 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6947 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
6948 | 4 | bool &Translated) { |
6949 | 4 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6950 | 4 | IRBuilder<> Builder(CI); |
6951 | | |
6952 | 4 | Value *Matrix = CI->getArgOperand(1); |
6953 | 4 | Value *Index = CI->getArgOperand(2); |
6954 | | |
6955 | 4 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6956 | 4 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, Matrix->getType()); |
6957 | | |
6958 | 4 | return Builder.CreateCall(DxilFunc, {OpArg, Matrix, Index}); |
6959 | 4 | } |
6960 | | |
6961 | | Value *TranslateLinAlgMatrixGetElement(CallInst *CI, IntrinsicOp IOP, |
6962 | | OP::OpCode OpCode, |
6963 | | HLOperationLowerHelper &Helper, |
6964 | | HLObjectOperationLowerHelper *ObjHelper, |
6965 | 10 | bool &Translated) { |
6966 | 10 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6967 | 10 | IRBuilder<> Builder(CI); |
6968 | | |
6969 | 10 | Value *RetElemPtr = CI->getArgOperand(1); |
6970 | 10 | DXASSERT_NOMSG(isa<PointerType>(RetElemPtr->getType())); |
6971 | 10 | Type *RetTy = RetElemPtr->getType()->getPointerElementType(); |
6972 | | |
6973 | 10 | Value *Matrix = CI->getArgOperand(2); |
6974 | 10 | Value *Index = CI->getArgOperand(3); |
6975 | | |
6976 | 10 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
6977 | 10 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {RetTy, Matrix->getType()}); |
6978 | | |
6979 | 10 | Value *RetElem = Builder.CreateCall(DxilFunc, {OpArg, Matrix, Index}); |
6980 | 10 | Builder.CreateStore(RetElem, RetElemPtr); |
6981 | | |
6982 | 10 | return nullptr; |
6983 | 10 | } |
6984 | | |
6985 | | Value *TranslateLinAlgMatrixSetElement(CallInst *CI, IntrinsicOp IOP, |
6986 | | OP::OpCode OpCode, |
6987 | | HLOperationLowerHelper &Helper, |
6988 | | HLObjectOperationLowerHelper *ObjHelper, |
6989 | 4 | bool &Translated) { |
6990 | 4 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6991 | 4 | IRBuilder<> Builder(CI); |
6992 | | |
6993 | 4 | Value *RetMatrixPtr = CI->getArgOperand(1); |
6994 | 4 | DXASSERT_NOMSG(isa<PointerType>(RetMatrixPtr->getType())); |
6995 | 4 | Type *RetMatrixTy = RetMatrixPtr->getType()->getPointerElementType(); |
6996 | | |
6997 | 4 | Value *InMatrix = CI->getArgOperand(2); |
6998 | 4 | Value *Index = CI->getArgOperand(3); |
6999 | 4 | Value *NewVal = CI->getArgOperand(4); |
7000 | | |
7001 | 4 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
7002 | 4 | Function *DxilFunc = HlslOp->GetOpFunc( |
7003 | 4 | OpCode, {RetMatrixTy, InMatrix->getType(), NewVal->getType()}); |
7004 | | |
7005 | 4 | Value *RetMatrix = |
7006 | 4 | Builder.CreateCall(DxilFunc, {OpArg, InMatrix, Index, NewVal}); |
7007 | 4 | Builder.CreateStore(RetMatrix, RetMatrixPtr); |
7008 | | |
7009 | 4 | return nullptr; |
7010 | 4 | } |
7011 | | |
7012 | | Value *TranslateLinAlgMatrixMatrixMultiply( |
7013 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
7014 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
7015 | 10 | bool &Translated) { |
7016 | 10 | hlsl::OP *HlslOp = &Helper.hlslOP; |
7017 | 10 | IRBuilder<> Builder(CI); |
7018 | | |
7019 | 10 | Value *MatrixCPtr = CI->getArgOperand(1); |
7020 | 10 | DXASSERT_NOMSG(isa<PointerType>(MatrixCPtr->getType())); |
7021 | 10 | Type *MatrixCTy = MatrixCPtr->getType()->getPointerElementType(); |
7022 | | |
7023 | 10 | Value *MatrixA = CI->getArgOperand(2); |
7024 | 10 | Value *MatrixB = CI->getArgOperand(3); |
7025 | | |
7026 | 10 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
7027 | 10 | Function *DxilFunc = HlslOp->GetOpFunc( |
7028 | 10 | OpCode, {MatrixCTy, MatrixA->getType(), MatrixB->getType()}); |
7029 | | |
7030 | 10 | Value *MatrixC = Builder.CreateCall(DxilFunc, {OpArg, MatrixA, MatrixB}); |
7031 | 10 | Builder.CreateStore(MatrixC, MatrixCPtr); |
7032 | | |
7033 | 10 | return nullptr; |
7034 | 10 | } |
7035 | | |
7036 | | Value *TranslateLinAlgMatrixMatrixMultiplyAccumulate( |
7037 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
7038 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
7039 | 4 | bool &Translated) { |
7040 | 4 | hlsl::OP *HlslOp = &Helper.hlslOP; |
7041 | 4 | IRBuilder<> Builder(CI); |
7042 | | |
7043 | 4 | Value *MatrixRPtr = CI->getArgOperand(1); |
7044 | 4 | DXASSERT_NOMSG(isa<PointerType>(MatrixRPtr->getType())); |
7045 | 4 | Type *MatrixRTy = MatrixRPtr->getType()->getPointerElementType(); |
7046 | | |
7047 | 4 | Value *MatrixA = CI->getArgOperand(2); |
7048 | 4 | Value *MatrixB = CI->getArgOperand(3); |
7049 | 4 | Value *MatrixC = CI->getArgOperand(4); |
7050 | | |
7051 | 4 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
7052 | 4 | Function *DxilFunc = |
7053 | 4 | HlslOp->GetOpFunc(OpCode, {MatrixRTy, MatrixA->getType(), |
7054 | 4 | MatrixB->getType(), MatrixC->getType()}); |
7055 | | |
7056 | 4 | Value *MatrixR = |
7057 | 4 | Builder.CreateCall(DxilFunc, {OpArg, MatrixA, MatrixB, MatrixC}); |
7058 | 4 | Builder.CreateStore(MatrixR, MatrixRPtr); |
7059 | | |
7060 | 4 | return nullptr; |
7061 | 4 | } |
7062 | | |
7063 | | Value *TranslateLinAlgCopyConvertMatrix(CallInst *CI, IntrinsicOp IOP, |
7064 | | OP::OpCode OpCode, |
7065 | | HLOperationLowerHelper &Helper, |
7066 | | HLObjectOperationLowerHelper *ObjHelper, |
7067 | 6 | bool &Translated) { |
7068 | 6 | hlsl::OP *HlslOp = &Helper.hlslOP; |
7069 | 6 | IRBuilder<> Builder(CI); |
7070 | | |
7071 | 6 | Value *MatrixRPtr = CI->getArgOperand(1); |
7072 | 6 | DXASSERT_NOMSG(isa<PointerType>(MatrixRPtr->getType())); |
7073 | 6 | Type *MatrixRTy = MatrixRPtr->getType()->getPointerElementType(); |
7074 | | |
7075 | 6 | Value *MatrixSrc = CI->getArgOperand(2); |
7076 | 6 | Value *Transpose = CI->getArgOperand(3); |
7077 | | |
7078 | 6 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
7079 | 6 | Function *DxilFunc = |
7080 | 6 | HlslOp->GetOpFunc(OpCode, {MatrixRTy, MatrixSrc->getType()}); |
7081 | | |
7082 | 6 | Value *MatrixR = Builder.CreateCall(DxilFunc, {OpArg, MatrixSrc, Transpose}); |
7083 | 6 | Builder.CreateStore(MatrixR, MatrixRPtr); |
7084 | | |
7085 | 6 | return nullptr; |
7086 | 6 | } |
7087 | | |
7088 | | Value *TranslateLinAlgMatrixLoadFromMemory( |
7089 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
7090 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
7091 | 4 | bool &Translated) { |
7092 | 4 | hlsl::OP *HlslOp = &Helper.hlslOP; |
7093 | 4 | IRBuilder<> Builder(CI); |
7094 | | |
7095 | 4 | Value *MatrixPtr = CI->getArgOperand(1); |
7096 | 4 | DXASSERT_NOMSG(isa<PointerType>(MatrixPtr->getType())); |
7097 | 4 | Type *MatrixType = MatrixPtr->getType()->getPointerElementType(); |
7098 | | |
7099 | 4 | Value *Arr = CI->getArgOperand(2); |
7100 | 4 | Value *Offset = CI->getArgOperand(3); |
7101 | 4 | Value *Stride = CI->getArgOperand(4); |
7102 | 4 | Value *Layout = CI->getArgOperand(5); |
7103 | | |
7104 | 4 | Value *Zero = Builder.getInt32(0); |
7105 | 4 | Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); |
7106 | 4 | Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); |
7107 | | |
7108 | 4 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
7109 | 4 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {MatrixType, ArrEltTy}); |
7110 | | |
7111 | 4 | Value *Matrix = |
7112 | 4 | Builder.CreateCall(DxilFunc, {OpArg, ArrPtr, Offset, Stride, Layout}); |
7113 | 4 | Builder.CreateStore(Matrix, MatrixPtr); |
7114 | | |
7115 | 4 | return nullptr; |
7116 | 4 | } |
7117 | | |
7118 | | Value *TranslateLinAlgMatrixAccumStoreToMemory( |
7119 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
7120 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *ObjHelper, |
7121 | 8 | bool &Translated) { |
7122 | 8 | hlsl::OP *HlslOp = &Helper.hlslOP; |
7123 | 8 | IRBuilder<> Builder(CI); |
7124 | | |
7125 | 8 | Value *Matrix = CI->getArgOperand(1); |
7126 | 8 | Value *Arr = CI->getArgOperand(2); |
7127 | 8 | Value *Offset = CI->getArgOperand(3); |
7128 | 8 | Value *Stride = CI->getArgOperand(4); |
7129 | 8 | Value *Layout = CI->getArgOperand(5); |
7130 | | |
7131 | 8 | Value *Zero = Builder.getInt32(0); |
7132 | 8 | Value *ArrPtr = Builder.CreateGEP(Arr, {Zero, Zero}); |
7133 | 8 | Type *ArrEltTy = ArrPtr->getType()->getPointerElementType(); |
7134 | | |
7135 | 8 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
7136 | 8 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {Matrix->getType(), ArrEltTy}); |
7137 | | |
7138 | 8 | return Builder.CreateCall(DxilFunc, |
7139 | 8 | {OpArg, Matrix, ArrPtr, Offset, Stride, Layout}); |
7140 | 8 | } |
7141 | | |
7142 | | Value *TranslateLinAlgConvert(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
7143 | | HLOperationLowerHelper &Helper, |
7144 | | HLObjectOperationLowerHelper *ObjHelper, |
7145 | 8 | bool &Translated) { |
7146 | 8 | hlsl::OP *HlslOp = &Helper.hlslOP; |
7147 | 8 | IRBuilder<> Builder(CI); |
7148 | | |
7149 | 8 | Value *OutVecPtr = CI->getArgOperand(1); |
7150 | 8 | DXASSERT_NOMSG(isa<PointerType>(OutVecPtr->getType())); |
7151 | 8 | Type *OutVecTy = OutVecPtr->getType()->getPointerElementType(); |
7152 | 8 | Value *InVec = CI->getArgOperand(2); |
7153 | 8 | Value *InInterp = CI->getArgOperand(3); |
7154 | 8 | Value *OutInterp = CI->getArgOperand(4); |
7155 | | |
7156 | 8 | Constant *OpArg = HlslOp->GetU32Const((unsigned)OpCode); |
7157 | 8 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, {OutVecTy, InVec->getType()}); |
7158 | | |
7159 | 8 | Value *OutVec = |
7160 | 8 | Builder.CreateCall(DxilFunc, {OpArg, InVec, InInterp, OutInterp}); |
7161 | 8 | Builder.CreateStore(OutVec, OutVecPtr); |
7162 | | |
7163 | 8 | return nullptr; |
7164 | 8 | } |
7165 | | |
7166 | | } // namespace |
7167 | | |
7168 | | // Lower table. |
7169 | | namespace { |
7170 | | |
7171 | | Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
7172 | | HLOperationLowerHelper &helper, |
7173 | 6 | HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { |
7174 | 6 | Translated = false; |
7175 | 6 | dxilutil::EmitErrorOnInstruction(CI, "Unsupported intrinsic."); |
7176 | 6 | return nullptr; |
7177 | 6 | } |
7178 | | |
7179 | | // SPIRV change starts |
7180 | | Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, |
7181 | | DXIL::OpCode opcode, |
7182 | | HLOperationLowerHelper &helper, |
7183 | | HLObjectOperationLowerHelper *pObjHelper, |
7184 | 0 | bool &Translated) { |
7185 | 0 | Translated = false; |
7186 | 0 | dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic."); |
7187 | 0 | return nullptr; |
7188 | 0 | } |
7189 | | // SPIRV change ends |
7190 | | |
7191 | | Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
7192 | | HLOperationLowerHelper &helper, |
7193 | | HLObjectOperationLowerHelper *pObjHelper, |
7194 | 0 | bool &Translated) { |
7195 | | // Translated in DxilGenerationPass::GenerateStreamOutputOperation. |
7196 | | // Do nothing here. |
7197 | | // Mark not translated. |
7198 | 0 | Translated = false; |
7199 | 0 | return nullptr; |
7200 | 0 | } |
7201 | | |
7202 | | // This table has to match IntrinsicOp orders |
7203 | | constexpr IntrinsicLower gLowerTable[] = { |
7204 | | {IntrinsicOp::IOP_AcceptHitAndEndSearch, |
7205 | | TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch}, |
7206 | | {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc}, |
7207 | | {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier}, |
7208 | | {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, |
7209 | | DXIL::OpCode::Barrier}, |
7210 | | {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, |
7211 | | DXIL::OpCode::AllocateRayQuery}, |
7212 | | {IntrinsicOp::IOP_Barrier, TranslateBarrier, DXIL::OpCode::NumOpCodes}, |
7213 | | {IntrinsicOp::IOP_CallShader, TranslateCallShader, |
7214 | | DXIL::OpCode::CallShader}, |
7215 | | {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, |
7216 | | DXIL::OpCode::CheckAccessFullyMapped}, |
7217 | | {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap, |
7218 | | DXIL::OpCode::CreateHandleFromHeap}, |
7219 | | {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, |
7220 | | DXIL::OpCode::NumOpCodes}, |
7221 | | {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, |
7222 | | DXIL::OpCode::Barrier}, |
7223 | | {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, |
7224 | | DXIL::OpCode::Barrier}, |
7225 | | {IntrinsicOp::IOP_DispatchMesh, TrivialDispatchMesh, |
7226 | | DXIL::OpCode::DispatchMesh}, |
7227 | | {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, |
7228 | | DXIL::OpCode::DispatchRaysDimensions}, |
7229 | | {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, |
7230 | | DXIL::OpCode::DispatchRaysIndex}, |
7231 | | {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, |
7232 | | DXIL::OpCode::NumOpCodes}, |
7233 | | {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, |
7234 | | DXIL::OpCode::EvalCentroid}, |
7235 | | {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, |
7236 | | DXIL::OpCode::NumOpCodes}, |
7237 | | {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation, |
7238 | | DXIL::OpCode::GeometryIndex}, |
7239 | | {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, |
7240 | | DXIL::OpCode::AttributeAtVertex}, |
7241 | | {IntrinsicOp::IOP_GetRemainingRecursionLevels, TrivialNoArgOperation, |
7242 | | DXIL::OpCode::GetRemainingRecursionLevels}, |
7243 | | {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, |
7244 | | DXIL::OpCode::RenderTargetGetSampleCount}, |
7245 | | {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, |
7246 | | DXIL::OpCode::NumOpCodes}, |
7247 | | {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, |
7248 | | DXIL::OpCode::Barrier}, |
7249 | | {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, |
7250 | | DXIL::OpCode::Barrier}, |
7251 | | {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, |
7252 | | DXIL::OpCode::HitKind}, |
7253 | | {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput, |
7254 | | DXIL::OpCode::IgnoreHit}, |
7255 | | {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, |
7256 | | DXIL::OpCode::InstanceID}, |
7257 | | {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, |
7258 | | DXIL::OpCode::InstanceIndex}, |
7259 | | {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, |
7260 | | DXIL::OpCode::NumOpCodes}, |
7261 | | {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, |
7262 | | DXIL::OpCode::NumOpCodes}, |
7263 | | {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, |
7264 | | DXIL::OpCode::NumOpCodes}, |
7265 | | {IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise, |
7266 | | TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
7267 | | {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, |
7268 | | DXIL::OpCode::NumOpCodes}, |
7269 | | {IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise, |
7270 | | TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
7271 | | {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, |
7272 | | DXIL::OpCode::NumOpCodes}, |
7273 | | {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, |
7274 | | DXIL::OpCode::NumOpCodes}, |
7275 | | {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, |
7276 | | DXIL::OpCode::NumOpCodes}, |
7277 | | {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, |
7278 | | DXIL::OpCode::NumOpCodes}, |
7279 | | {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, |
7280 | | DXIL::OpCode::NumOpCodes}, |
7281 | | {IntrinsicOp::IOP_IsHelperLane, TrivialNoArgWithRetOperation, |
7282 | | DXIL::OpCode::IsHelperLane}, |
7283 | | {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, |
7284 | | DXIL::OpCode::NumOpCodes}, |
7285 | | {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, |
7286 | | DXIL::OpCode::ObjectRayDirection}, |
7287 | | {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, |
7288 | | DXIL::OpCode::ObjectRayOrigin}, |
7289 | | {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation, |
7290 | | DXIL::OpCode::ObjectToWorld}, |
7291 | | {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation, |
7292 | | DXIL::OpCode::ObjectToWorld}, |
7293 | | {IntrinsicOp::IOP_ObjectToWorld4x3, |
7294 | | TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld}, |
7295 | | {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, |
7296 | | DXIL::OpCode::PrimitiveIndex}, |
7297 | | {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, |
7298 | | DXIL::OpCode::NumOpCodes}, |
7299 | | {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, |
7300 | | DXIL::OpCode::NumOpCodes}, |
7301 | | {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, |
7302 | | DXIL::OpCode::NumOpCodes}, |
7303 | | {IntrinsicOp::IOP_ProcessIsolineTessFactors, |
7304 | | TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes}, |
7305 | | {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, |
7306 | | DXIL::OpCode::NumOpCodes}, |
7307 | | {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, |
7308 | | DXIL::OpCode::NumOpCodes}, |
7309 | | {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, |
7310 | | DXIL::OpCode::NumOpCodes}, |
7311 | | {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, |
7312 | | DXIL::OpCode::NumOpCodes}, |
7313 | | {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, |
7314 | | DXIL::OpCode::NumOpCodes}, |
7315 | | {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, |
7316 | | DXIL::OpCode::NumOpCodes}, |
7317 | | {IntrinsicOp::IOP_QuadAll, TranslateQuadAnyAll, DXIL::OpCode::QuadVote}, |
7318 | | {IntrinsicOp::IOP_QuadAny, TranslateQuadAnyAll, DXIL::OpCode::QuadVote}, |
7319 | | {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, |
7320 | | DXIL::OpCode::QuadOp}, |
7321 | | {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, |
7322 | | DXIL::OpCode::QuadOp}, |
7323 | | {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, |
7324 | | DXIL::OpCode::QuadOp}, |
7325 | | {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, |
7326 | | DXIL::OpCode::NumOpCodes}, |
7327 | | {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, |
7328 | | DXIL::OpCode::RayFlags}, |
7329 | | {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, |
7330 | | DXIL::OpCode::RayTCurrent}, |
7331 | | {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, |
7332 | | DXIL::OpCode::RayTMin}, |
7333 | | {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, |
7334 | | DXIL::OpCode::ReportHit}, |
7335 | | {IntrinsicOp::IOP_SetMeshOutputCounts, TrivialSetMeshOutputCounts, |
7336 | | DXIL::OpCode::SetMeshOutputCounts}, |
7337 | | {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay}, |
7338 | | {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, |
7339 | | DXIL::OpCode::WaveActiveAllEqual}, |
7340 | | {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, |
7341 | | DXIL::OpCode::WaveAllTrue}, |
7342 | | {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, |
7343 | | DXIL::OpCode::WaveAnyTrue}, |
7344 | | {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, |
7345 | | DXIL::OpCode::WaveActiveBallot}, |
7346 | | {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, |
7347 | | DXIL::OpCode::WaveActiveBit}, |
7348 | | {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, |
7349 | | DXIL::OpCode::WaveActiveBit}, |
7350 | | {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, |
7351 | | DXIL::OpCode::WaveActiveBit}, |
7352 | | {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, |
7353 | | DXIL::OpCode::WaveAllBitCount}, |
7354 | | {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, |
7355 | | DXIL::OpCode::WaveActiveOp}, |
7356 | | {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, |
7357 | | DXIL::OpCode::WaveActiveOp}, |
7358 | | {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, |
7359 | | DXIL::OpCode::WaveActiveOp}, |
7360 | | {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, |
7361 | | DXIL::OpCode::WaveActiveOp}, |
7362 | | {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, |
7363 | | DXIL::OpCode::WaveGetLaneCount}, |
7364 | | {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, |
7365 | | DXIL::OpCode::WaveGetLaneIndex}, |
7366 | | {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, |
7367 | | DXIL::OpCode::WaveIsFirstLane}, |
7368 | | {IntrinsicOp::IOP_WaveMatch, TranslateWaveMatch, DXIL::OpCode::WaveMatch}, |
7369 | | {IntrinsicOp::IOP_WaveMultiPrefixBitAnd, TranslateWaveMultiPrefix, |
7370 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7371 | | {IntrinsicOp::IOP_WaveMultiPrefixBitOr, TranslateWaveMultiPrefix, |
7372 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7373 | | {IntrinsicOp::IOP_WaveMultiPrefixBitXor, TranslateWaveMultiPrefix, |
7374 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7375 | | {IntrinsicOp::IOP_WaveMultiPrefixCountBits, |
7376 | | TranslateWaveMultiPrefixBitCount, DXIL::OpCode::WaveMultiPrefixBitCount}, |
7377 | | {IntrinsicOp::IOP_WaveMultiPrefixProduct, TranslateWaveMultiPrefix, |
7378 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7379 | | {IntrinsicOp::IOP_WaveMultiPrefixSum, TranslateWaveMultiPrefix, |
7380 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7381 | | {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, |
7382 | | DXIL::OpCode::WavePrefixBitCount}, |
7383 | | {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, |
7384 | | DXIL::OpCode::WavePrefixOp}, |
7385 | | {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, |
7386 | | DXIL::OpCode::WavePrefixOp}, |
7387 | | {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, |
7388 | | DXIL::OpCode::WaveReadLaneAt}, |
7389 | | {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, |
7390 | | DXIL::OpCode::WaveReadLaneFirst}, |
7391 | | {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, |
7392 | | DXIL::OpCode::WorldRayDirection}, |
7393 | | {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, |
7394 | | DXIL::OpCode::WorldRayOrigin}, |
7395 | | {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation, |
7396 | | DXIL::OpCode::WorldToObject}, |
7397 | | {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation, |
7398 | | DXIL::OpCode::WorldToObject}, |
7399 | | {IntrinsicOp::IOP_WorldToObject4x3, |
7400 | | TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject}, |
7401 | | {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7402 | | {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes}, |
7403 | | {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos}, |
7404 | | {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes}, |
7405 | | {IntrinsicOp::IOP_and, TranslateAnd, DXIL::OpCode::NumOpCodes}, |
7406 | | {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes}, |
7407 | | {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble}, |
7408 | | {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
7409 | | {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
7410 | | {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin}, |
7411 | | {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
7412 | | {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
7413 | | {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble}, |
7414 | | {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes}, |
7415 | | {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan}, |
7416 | | {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes}, |
7417 | | {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi}, |
7418 | | {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes}, |
7419 | | {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes}, |
7420 | | {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos}, |
7421 | | {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos}, |
7422 | | {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet, |
7423 | | DXIL::OpCode::Countbits}, |
7424 | | {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes}, |
7425 | | {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX}, |
7426 | | {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, |
7427 | | DXIL::OpCode::DerivCoarseX}, |
7428 | | {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, |
7429 | | DXIL::OpCode::DerivFineX}, |
7430 | | {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY}, |
7431 | | {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, |
7432 | | DXIL::OpCode::DerivCoarseY}, |
7433 | | {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, |
7434 | | DXIL::OpCode::DerivFineY}, |
7435 | | {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes}, |
7436 | | {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7437 | | {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes}, |
7438 | | {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes}, |
7439 | | {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf}, |
7440 | | {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked, |
7441 | | DXIL::OpCode::Dot4AddI8Packed}, |
7442 | | {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked, |
7443 | | DXIL::OpCode::Dot4AddU8Packed}, |
7444 | | {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes}, |
7445 | | {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes}, |
7446 | | {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp}, |
7447 | | {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, |
7448 | | DXIL::OpCode::LegacyF16ToF32}, |
7449 | | {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, |
7450 | | DXIL::OpCode::LegacyF32ToF16}, |
7451 | | {IntrinsicOp::IOP_faceforward, TranslateFaceforward, |
7452 | | DXIL::OpCode::NumOpCodes}, |
7453 | | {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, |
7454 | | DXIL::OpCode::FirstbitSHi}, |
7455 | | {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, |
7456 | | DXIL::OpCode::FirstbitLo}, |
7457 | | {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni}, |
7458 | | {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma}, |
7459 | | {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes}, |
7460 | | {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc}, |
7461 | | {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes}, |
7462 | | {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes}, |
7463 | | {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite}, |
7464 | | {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf}, |
7465 | | {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN}, |
7466 | | {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes}, |
7467 | | {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes}, |
7468 | | {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes}, |
7469 | | {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes}, |
7470 | | {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes}, |
7471 | | {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes}, |
7472 | | {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log}, |
7473 | | {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad}, |
7474 | | {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax}, |
7475 | | {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin}, |
7476 | | {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes}, |
7477 | | {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes}, |
7478 | | {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes}, |
7479 | | {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes}, |
7480 | | {IntrinsicOp::IOP_or, TranslateOr, DXIL::OpCode::NumOpCodes}, |
7481 | | {IntrinsicOp::IOP_pack_clamp_s8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7482 | | {IntrinsicOp::IOP_pack_clamp_u8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7483 | | {IntrinsicOp::IOP_pack_s8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7484 | | {IntrinsicOp::IOP_pack_u8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7485 | | {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes}, |
7486 | | {IntrinsicOp::IOP_printf, TranslatePrintf, DXIL::OpCode::NumOpCodes}, |
7487 | | {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes}, |
7488 | | {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes}, |
7489 | | {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes}, |
7490 | | {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes}, |
7491 | | {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev}, |
7492 | | {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne}, |
7493 | | {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt}, |
7494 | | {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate}, |
7495 | | {IntrinsicOp::IOP_select, TranslateSelect, DXIL::OpCode::NumOpCodes}, |
7496 | | {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes}, |
7497 | | {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin}, |
7498 | | {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7499 | | {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin}, |
7500 | | {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, |
7501 | | DXIL::OpCode::NumOpCodes}, |
7502 | | {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7503 | | {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt}, |
7504 | | {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes}, |
7505 | | {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan}, |
7506 | | {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan}, |
7507 | | {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7508 | | {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7509 | | {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7510 | | {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7511 | | {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7512 | | {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7513 | | {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7514 | | {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7515 | | {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7516 | | {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7517 | | {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7518 | | {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7519 | | {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7520 | | {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7521 | | {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7522 | | {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7523 | | {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7524 | | {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7525 | | {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7526 | | {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7527 | | {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7528 | | {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z}, |
7529 | | {IntrinsicOp::IOP_unpack_s8s16, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7530 | | {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7531 | | {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7532 | | {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7533 | | {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic, |
7534 | | DXIL::OpCode::NumOpCodes}, |
7535 | | {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic, |
7536 | | DXIL::OpCode::NumOpCodes}, |
7537 | | {IntrinsicOp::IOP_VkReadClock, UnsupportedVulkanIntrinsic, |
7538 | | DXIL::OpCode::NumOpCodes}, |
7539 | | {IntrinsicOp::IOP_Vkext_execution_mode, UnsupportedVulkanIntrinsic, |
7540 | | DXIL::OpCode::NumOpCodes}, |
7541 | | {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic, |
7542 | | DXIL::OpCode::NumOpCodes}, |
7543 | | {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream}, |
7544 | | {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream}, |
7545 | | {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, |
7546 | | DXIL::OpCode::NumOpCodes}, |
7547 | | {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, |
7548 | | DXIL::OpCode::NumOpCodes}, |
7549 | | {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, |
7550 | | DXIL::OpCode::NumOpCodes}, |
7551 | | {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7552 | | {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample}, |
7553 | | {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias}, |
7554 | | {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp}, |
7555 | | {IntrinsicOp::MOP_SampleCmpBias, TranslateSample, |
7556 | | DXIL::OpCode::SampleCmpBias}, |
7557 | | {IntrinsicOp::MOP_SampleCmpGrad, TranslateSample, |
7558 | | DXIL::OpCode::SampleCmpGrad}, |
7559 | | {IntrinsicOp::MOP_SampleCmpLevel, TranslateSample, |
7560 | | DXIL::OpCode::SampleCmpLevel}, |
7561 | | {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, |
7562 | | DXIL::OpCode::SampleCmpLevelZero}, |
7563 | | {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad}, |
7564 | | {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel}, |
7565 | | {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather}, |
7566 | | {IntrinsicOp::MOP_GatherAlpha, TranslateGather, |
7567 | | DXIL::OpCode::TextureGather}, |
7568 | | {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather}, |
7569 | | {IntrinsicOp::MOP_GatherCmp, TranslateGather, |
7570 | | DXIL::OpCode::TextureGatherCmp}, |
7571 | | {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, |
7572 | | DXIL::OpCode::TextureGatherCmp}, |
7573 | | {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, |
7574 | | DXIL::OpCode::TextureGatherCmp}, |
7575 | | {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, |
7576 | | DXIL::OpCode::TextureGatherCmp}, |
7577 | | {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, |
7578 | | DXIL::OpCode::TextureGatherCmp}, |
7579 | | {IntrinsicOp::MOP_GatherGreen, TranslateGather, |
7580 | | DXIL::OpCode::TextureGather}, |
7581 | | {IntrinsicOp::MOP_GatherRaw, TranslateGather, |
7582 | | DXIL::OpCode::TextureGatherRaw}, |
7583 | | {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather}, |
7584 | | {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, |
7585 | | DXIL::OpCode::NumOpCodes}, |
7586 | | {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7587 | | {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7588 | | {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7589 | | {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, |
7590 | | DXIL::OpCode::NumOpCodes}, |
7591 | | {IntrinsicOp::MOP_InterlockedAdd64, TranslateMopAtomicBinaryOperation, |
7592 | | DXIL::OpCode::NumOpCodes}, |
7593 | | {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, |
7594 | | DXIL::OpCode::NumOpCodes}, |
7595 | | {IntrinsicOp::MOP_InterlockedAnd64, TranslateMopAtomicBinaryOperation, |
7596 | | DXIL::OpCode::NumOpCodes}, |
7597 | | {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, |
7598 | | DXIL::OpCode::NumOpCodes}, |
7599 | | {IntrinsicOp::MOP_InterlockedCompareExchange64, TranslateMopAtomicCmpXChg, |
7600 | | DXIL::OpCode::NumOpCodes}, |
7601 | | {IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise, |
7602 | | TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
7603 | | {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, |
7604 | | DXIL::OpCode::NumOpCodes}, |
7605 | | {IntrinsicOp::MOP_InterlockedCompareStore64, TranslateMopAtomicCmpXChg, |
7606 | | DXIL::OpCode::NumOpCodes}, |
7607 | | {IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise, |
7608 | | TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
7609 | | {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, |
7610 | | DXIL::OpCode::NumOpCodes}, |
7611 | | {IntrinsicOp::MOP_InterlockedExchange64, TranslateMopAtomicBinaryOperation, |
7612 | | DXIL::OpCode::NumOpCodes}, |
7613 | | {IntrinsicOp::MOP_InterlockedExchangeFloat, |
7614 | | TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, |
7615 | | {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, |
7616 | | DXIL::OpCode::NumOpCodes}, |
7617 | | {IntrinsicOp::MOP_InterlockedMax64, TranslateMopAtomicBinaryOperation, |
7618 | | DXIL::OpCode::NumOpCodes}, |
7619 | | {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, |
7620 | | DXIL::OpCode::NumOpCodes}, |
7621 | | {IntrinsicOp::MOP_InterlockedMin64, TranslateMopAtomicBinaryOperation, |
7622 | | DXIL::OpCode::NumOpCodes}, |
7623 | | {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, |
7624 | | DXIL::OpCode::NumOpCodes}, |
7625 | | {IntrinsicOp::MOP_InterlockedOr64, TranslateMopAtomicBinaryOperation, |
7626 | | DXIL::OpCode::NumOpCodes}, |
7627 | | {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, |
7628 | | DXIL::OpCode::NumOpCodes}, |
7629 | | {IntrinsicOp::MOP_InterlockedXor64, TranslateMopAtomicBinaryOperation, |
7630 | | DXIL::OpCode::NumOpCodes}, |
7631 | | {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7632 | | {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7633 | | {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7634 | | {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7635 | | {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, |
7636 | | DXIL::OpCode::NumOpCodes}, |
7637 | | {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, |
7638 | | DXIL::OpCode::NumOpCodes}, |
7639 | | {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7640 | | {IntrinsicOp::MOP_WriteSamplerFeedback, TranslateWriteSamplerFeedback, |
7641 | | DXIL::OpCode::WriteSamplerFeedback}, |
7642 | | {IntrinsicOp::MOP_WriteSamplerFeedbackBias, TranslateWriteSamplerFeedback, |
7643 | | DXIL::OpCode::WriteSamplerFeedbackBias}, |
7644 | | {IntrinsicOp::MOP_WriteSamplerFeedbackGrad, TranslateWriteSamplerFeedback, |
7645 | | DXIL::OpCode::WriteSamplerFeedbackGrad}, |
7646 | | {IntrinsicOp::MOP_WriteSamplerFeedbackLevel, TranslateWriteSamplerFeedback, |
7647 | | DXIL::OpCode::WriteSamplerFeedbackLevel}, |
7648 | | |
7649 | | {IntrinsicOp::MOP_Abort, TranslateGenericRayQueryMethod, |
7650 | | DXIL::OpCode::RayQuery_Abort}, |
7651 | | {IntrinsicOp::MOP_CandidateGeometryIndex, TranslateGenericRayQueryMethod, |
7652 | | DXIL::OpCode::RayQuery_CandidateGeometryIndex}, |
7653 | | {IntrinsicOp::MOP_CandidateInstanceContributionToHitGroupIndex, |
7654 | | TranslateGenericRayQueryMethod, |
7655 | | DXIL::OpCode::RayQuery_CandidateInstanceContributionToHitGroupIndex}, |
7656 | | {IntrinsicOp::MOP_CandidateInstanceID, TranslateGenericRayQueryMethod, |
7657 | | DXIL::OpCode::RayQuery_CandidateInstanceID}, |
7658 | | {IntrinsicOp::MOP_CandidateInstanceIndex, TranslateGenericRayQueryMethod, |
7659 | | DXIL::OpCode::RayQuery_CandidateInstanceIndex}, |
7660 | | {IntrinsicOp::MOP_CandidateObjectRayDirection, |
7661 | | TranslateRayQueryFloat3Getter, |
7662 | | DXIL::OpCode::RayQuery_CandidateObjectRayDirection}, |
7663 | | {IntrinsicOp::MOP_CandidateObjectRayOrigin, TranslateRayQueryFloat3Getter, |
7664 | | DXIL::OpCode::RayQuery_CandidateObjectRayOrigin}, |
7665 | | {IntrinsicOp::MOP_CandidateObjectToWorld3x4, |
7666 | | TranslateRayQueryMatrix3x4Operation, |
7667 | | DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4}, |
7668 | | {IntrinsicOp::MOP_CandidateObjectToWorld4x3, |
7669 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7670 | | DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4}, |
7671 | | {IntrinsicOp::MOP_CandidatePrimitiveIndex, TranslateGenericRayQueryMethod, |
7672 | | DXIL::OpCode::RayQuery_CandidatePrimitiveIndex}, |
7673 | | {IntrinsicOp::MOP_CandidateProceduralPrimitiveNonOpaque, |
7674 | | TranslateGenericRayQueryMethod, |
7675 | | DXIL::OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque}, |
7676 | | {IntrinsicOp::MOP_CandidateTriangleBarycentrics, |
7677 | | TranslateRayQueryFloat2Getter, |
7678 | | DXIL::OpCode::RayQuery_CandidateTriangleBarycentrics}, |
7679 | | {IntrinsicOp::MOP_CandidateTriangleFrontFace, |
7680 | | TranslateGenericRayQueryMethod, |
7681 | | DXIL::OpCode::RayQuery_CandidateTriangleFrontFace}, |
7682 | | {IntrinsicOp::MOP_CandidateTriangleRayT, TranslateGenericRayQueryMethod, |
7683 | | DXIL::OpCode::RayQuery_CandidateTriangleRayT}, |
7684 | | {IntrinsicOp::MOP_CandidateType, TranslateGenericRayQueryMethod, |
7685 | | DXIL::OpCode::RayQuery_CandidateType}, |
7686 | | {IntrinsicOp::MOP_CandidateWorldToObject3x4, |
7687 | | TranslateRayQueryMatrix3x4Operation, |
7688 | | DXIL::OpCode::RayQuery_CandidateWorldToObject3x4}, |
7689 | | {IntrinsicOp::MOP_CandidateWorldToObject4x3, |
7690 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7691 | | DXIL::OpCode::RayQuery_CandidateWorldToObject3x4}, |
7692 | | {IntrinsicOp::MOP_CommitNonOpaqueTriangleHit, |
7693 | | TranslateGenericRayQueryMethod, |
7694 | | DXIL::OpCode::RayQuery_CommitNonOpaqueTriangleHit}, |
7695 | | {IntrinsicOp::MOP_CommitProceduralPrimitiveHit, |
7696 | | TranslateCommitProceduralPrimitiveHit, |
7697 | | DXIL::OpCode::RayQuery_CommitProceduralPrimitiveHit}, |
7698 | | {IntrinsicOp::MOP_CommittedGeometryIndex, TranslateGenericRayQueryMethod, |
7699 | | DXIL::OpCode::RayQuery_CommittedGeometryIndex}, |
7700 | | {IntrinsicOp::MOP_CommittedInstanceContributionToHitGroupIndex, |
7701 | | TranslateGenericRayQueryMethod, |
7702 | | DXIL::OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex}, |
7703 | | {IntrinsicOp::MOP_CommittedInstanceID, TranslateGenericRayQueryMethod, |
7704 | | DXIL::OpCode::RayQuery_CommittedInstanceID}, |
7705 | | {IntrinsicOp::MOP_CommittedInstanceIndex, TranslateGenericRayQueryMethod, |
7706 | | DXIL::OpCode::RayQuery_CommittedInstanceIndex}, |
7707 | | {IntrinsicOp::MOP_CommittedObjectRayDirection, |
7708 | | TranslateRayQueryFloat3Getter, |
7709 | | DXIL::OpCode::RayQuery_CommittedObjectRayDirection}, |
7710 | | {IntrinsicOp::MOP_CommittedObjectRayOrigin, TranslateRayQueryFloat3Getter, |
7711 | | DXIL::OpCode::RayQuery_CommittedObjectRayOrigin}, |
7712 | | {IntrinsicOp::MOP_CommittedObjectToWorld3x4, |
7713 | | TranslateRayQueryMatrix3x4Operation, |
7714 | | DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4}, |
7715 | | {IntrinsicOp::MOP_CommittedObjectToWorld4x3, |
7716 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7717 | | DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4}, |
7718 | | {IntrinsicOp::MOP_CommittedPrimitiveIndex, TranslateGenericRayQueryMethod, |
7719 | | DXIL::OpCode::RayQuery_CommittedPrimitiveIndex}, |
7720 | | {IntrinsicOp::MOP_CommittedRayT, TranslateGenericRayQueryMethod, |
7721 | | DXIL::OpCode::RayQuery_CommittedRayT}, |
7722 | | {IntrinsicOp::MOP_CommittedStatus, TranslateGenericRayQueryMethod, |
7723 | | DXIL::OpCode::RayQuery_CommittedStatus}, |
7724 | | {IntrinsicOp::MOP_CommittedTriangleBarycentrics, |
7725 | | TranslateRayQueryFloat2Getter, |
7726 | | DXIL::OpCode::RayQuery_CommittedTriangleBarycentrics}, |
7727 | | {IntrinsicOp::MOP_CommittedTriangleFrontFace, |
7728 | | TranslateGenericRayQueryMethod, |
7729 | | DXIL::OpCode::RayQuery_CommittedTriangleFrontFace}, |
7730 | | {IntrinsicOp::MOP_CommittedWorldToObject3x4, |
7731 | | TranslateRayQueryMatrix3x4Operation, |
7732 | | DXIL::OpCode::RayQuery_CommittedWorldToObject3x4}, |
7733 | | {IntrinsicOp::MOP_CommittedWorldToObject4x3, |
7734 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7735 | | DXIL::OpCode::RayQuery_CommittedWorldToObject3x4}, |
7736 | | {IntrinsicOp::MOP_Proceed, TranslateGenericRayQueryMethod, |
7737 | | DXIL::OpCode::RayQuery_Proceed}, |
7738 | | {IntrinsicOp::MOP_RayFlags, TranslateGenericRayQueryMethod, |
7739 | | DXIL::OpCode::RayQuery_RayFlags}, |
7740 | | {IntrinsicOp::MOP_RayTMin, TranslateGenericRayQueryMethod, |
7741 | | DXIL::OpCode::RayQuery_RayTMin}, |
7742 | | {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline, |
7743 | | DXIL::OpCode::RayQuery_TraceRayInline}, |
7744 | | {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter, |
7745 | | DXIL::OpCode::RayQuery_WorldRayDirection}, |
7746 | | {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter, |
7747 | | DXIL::OpCode::RayQuery_WorldRayOrigin}, |
7748 | | {IntrinsicOp::MOP_Count, TranslateNodeGetInputRecordCount, |
7749 | | DXIL::OpCode::GetInputRecordCount}, |
7750 | | {IntrinsicOp::MOP_FinishedCrossGroupSharing, |
7751 | | TranslateNodeFinishedCrossGroupSharing, |
7752 | | DXIL::OpCode::FinishedCrossGroupSharing}, |
7753 | | {IntrinsicOp::MOP_GetGroupNodeOutputRecords, |
7754 | | TranslateGetGroupNodeOutputRecords, |
7755 | | DXIL::OpCode::AllocateNodeOutputRecords}, |
7756 | | {IntrinsicOp::MOP_GetThreadNodeOutputRecords, |
7757 | | TranslateGetThreadNodeOutputRecords, |
7758 | | DXIL::OpCode::AllocateNodeOutputRecords}, |
7759 | | {IntrinsicOp::MOP_IsValid, TranslateNodeOutputIsValid, |
7760 | | DXIL::OpCode::NodeOutputIsValid}, |
7761 | | {IntrinsicOp::MOP_GroupIncrementOutputCount, |
7762 | | TranslateNodeGroupIncrementOutputCount, |
7763 | | DXIL::OpCode::IncrementOutputCount}, |
7764 | | {IntrinsicOp::MOP_ThreadIncrementOutputCount, |
7765 | | TranslateNodeThreadIncrementOutputCount, |
7766 | | DXIL::OpCode::IncrementOutputCount}, |
7767 | | {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete, |
7768 | | DXIL::OpCode::OutputComplete}, |
7769 | | |
7770 | | // SPIRV change starts |
7771 | | {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, |
7772 | | DXIL::OpCode::NumOpCodes}, |
7773 | | // SPIRV change ends |
7774 | | |
7775 | | // Manually added part. |
7776 | | {IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, |
7777 | | DXIL::OpCode::NumOpCodes}, |
7778 | | {IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, |
7779 | | DXIL::OpCode::NumOpCodes}, |
7780 | | {IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, |
7781 | | DXIL::OpCode::WaveActiveOp}, |
7782 | | {IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, |
7783 | | DXIL::OpCode::WaveActiveOp}, |
7784 | | {IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, |
7785 | | DXIL::OpCode::WaveActiveOp}, |
7786 | | {IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, |
7787 | | DXIL::OpCode::WaveActiveOp}, |
7788 | | {IntrinsicOp::IOP_WaveMultiPrefixUProduct, TranslateWaveMultiPrefix, |
7789 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7790 | | {IntrinsicOp::IOP_WaveMultiPrefixUSum, TranslateWaveMultiPrefix, |
7791 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7792 | | {IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, |
7793 | | DXIL::OpCode::WavePrefixOp}, |
7794 | | {IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, |
7795 | | DXIL::OpCode::WavePrefixOp}, |
7796 | | {IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes}, |
7797 | | {IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes}, |
7798 | | {IntrinsicOp::IOP_udot, TranslateDot, DXIL::OpCode::NumOpCodes}, |
7799 | | {IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, |
7800 | | DXIL::OpCode::FirstbitHi}, |
7801 | | {IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad}, |
7802 | | {IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax}, |
7803 | | {IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin}, |
7804 | | {IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul}, |
7805 | | {IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax}, |
7806 | | {IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, |
7807 | | DXIL::OpCode::NumOpCodes}, |
7808 | | {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, |
7809 | | DXIL::OpCode::NumOpCodes}, |
7810 | | {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMakeNop, |
7811 | | DXIL::OpCode::HitObject_MakeNop}, |
7812 | | {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, |
7813 | | DXIL::OpCode::MaybeReorderThread}, |
7814 | | {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic, |
7815 | | DXIL::OpCode::NumOpCodes}, |
7816 | | {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic, |
7817 | | DXIL::OpCode::NumOpCodes}, |
7818 | | {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic, |
7819 | | DXIL::OpCode::NumOpCodes}, |
7820 | | {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery, |
7821 | | DXIL::OpCode::HitObject_FromRayQuery}, |
7822 | | {IntrinsicOp::MOP_DxHitObject_GetAttributes, |
7823 | | TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes}, |
7824 | | {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex, |
7825 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex}, |
7826 | | {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter, |
7827 | | DXIL::OpCode::HitObject_HitKind}, |
7828 | | {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter, |
7829 | | DXIL::OpCode::HitObject_InstanceID}, |
7830 | | {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex, |
7831 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex}, |
7832 | | {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection, |
7833 | | TranslateHitObjectVectorGetter, |
7834 | | DXIL::OpCode::HitObject_ObjectRayDirection}, |
7835 | | {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin, |
7836 | | TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin}, |
7837 | | {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4, |
7838 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, |
7839 | | {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3, |
7840 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, |
7841 | | {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex, |
7842 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex}, |
7843 | | {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter, |
7844 | | DXIL::OpCode::HitObject_RayFlags}, |
7845 | | {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent, |
7846 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent}, |
7847 | | {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter, |
7848 | | DXIL::OpCode::HitObject_RayTMin}, |
7849 | | {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex, |
7850 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex}, |
7851 | | {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection, |
7852 | | TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection}, |
7853 | | {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin, |
7854 | | TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin}, |
7855 | | {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4, |
7856 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, |
7857 | | {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3, |
7858 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, |
7859 | | {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke, |
7860 | | DXIL::OpCode::HitObject_Invoke}, |
7861 | | {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter, |
7862 | | DXIL::OpCode::HitObject_IsHit}, |
7863 | | {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter, |
7864 | | DXIL::OpCode::HitObject_IsMiss}, |
7865 | | {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter, |
7866 | | DXIL::OpCode::HitObject_IsNop}, |
7867 | | {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant, |
7868 | | TranslateHitObjectLoadLocalRootTableConstant, |
7869 | | DXIL::OpCode::HitObject_LoadLocalRootTableConstant}, |
7870 | | {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMakeMiss, |
7871 | | DXIL::OpCode::HitObject_MakeMiss}, |
7872 | | {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex, |
7873 | | TranslateHitObjectSetShaderTableIndex, |
7874 | | DXIL::OpCode::HitObject_SetShaderTableIndex}, |
7875 | | {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay, |
7876 | | DXIL::OpCode::HitObject_TraceRay}, |
7877 | | |
7878 | | {IntrinsicOp::IOP_isnormal, TrivialIsSpecialFloat, DXIL::OpCode::IsNormal}, |
7879 | | |
7880 | | {IntrinsicOp::IOP_GetGroupWaveCount, TranslateWaveToVal, |
7881 | | DXIL::OpCode::GetGroupWaveCount}, |
7882 | | {IntrinsicOp::IOP_GetGroupWaveIndex, TranslateWaveToVal, |
7883 | | DXIL::OpCode::GetGroupWaveIndex}, |
7884 | | |
7885 | | {IntrinsicOp::IOP_ClusterID, TrivialNoArgWithRetNoOverloadOperation, |
7886 | | DXIL::OpCode::ClusterID}, |
7887 | | {IntrinsicOp::MOP_CandidateClusterID, TranslateGenericRayQueryMethod, |
7888 | | DXIL::OpCode::RayQuery_CandidateClusterID}, |
7889 | | {IntrinsicOp::MOP_CommittedClusterID, TranslateGenericRayQueryMethod, |
7890 | | DXIL::OpCode::RayQuery_CommittedClusterID}, |
7891 | | {IntrinsicOp::MOP_DxHitObject_GetClusterID, TranslateHitObjectScalarGetter, |
7892 | | DXIL::OpCode::HitObject_ClusterID}, |
7893 | | |
7894 | | {IntrinsicOp::IOP_TriangleObjectPositions, TranslateTriangleObjectPositions, |
7895 | | DXIL::OpCode::TriangleObjectPosition}, |
7896 | | {IntrinsicOp::MOP_CandidateTriangleObjectPositions, |
7897 | | TranslateRayQueryTriangleObjectPositions, |
7898 | | DXIL::OpCode::RayQuery_CandidateTriangleObjectPosition}, |
7899 | | {IntrinsicOp::MOP_CommittedTriangleObjectPositions, |
7900 | | TranslateRayQueryTriangleObjectPositions, |
7901 | | DXIL::OpCode::RayQuery_CommittedTriangleObjectPosition}, |
7902 | | {IntrinsicOp::MOP_DxHitObject_TriangleObjectPositions, |
7903 | | TranslateHitObjectTriangleObjectPositions, |
7904 | | DXIL::OpCode::HitObject_TriangleObjectPosition}, |
7905 | | |
7906 | | {IntrinsicOp::IOP___builtin_LinAlg_CopyConvertMatrix, |
7907 | | TranslateLinAlgCopyConvertMatrix, DXIL::OpCode::LinAlgCopyConvertMatrix}, |
7908 | | {IntrinsicOp::IOP___builtin_LinAlg_FillMatrix, TranslateLinAlgFillMatrix, |
7909 | | DXIL::OpCode::LinAlgFillMatrix}, |
7910 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixGetCoordinate, |
7911 | | TranslateLinAlgMatrixGetCoordinate, |
7912 | | DXIL::OpCode::LinAlgMatrixGetCoordinate}, |
7913 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixGetElement, |
7914 | | TranslateLinAlgMatrixGetElement, DXIL::OpCode::LinAlgMatrixGetElement}, |
7915 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixLength, TrivialUnaryOperation, |
7916 | | DXIL::OpCode::LinAlgMatrixLength}, |
7917 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromDescriptor, |
7918 | | TranslateLinAlgMatrixLoadFromDescriptor, |
7919 | | DXIL::OpCode::LinAlgMatrixLoadFromDescriptor}, |
7920 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixLoadFromMemory, |
7921 | | TranslateLinAlgMatrixLoadFromMemory, |
7922 | | DXIL::OpCode::LinAlgMatrixLoadFromMemory}, |
7923 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixSetElement, |
7924 | | TranslateLinAlgMatrixSetElement, DXIL::OpCode::LinAlgMatrixSetElement}, |
7925 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToDescriptor, |
7926 | | TranslateLinAlgMatrixAccumStoreToDescriptor, |
7927 | | DXIL::OpCode::LinAlgMatrixStoreToDescriptor}, |
7928 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixStoreToMemory, |
7929 | | TranslateLinAlgMatrixAccumStoreToMemory, |
7930 | | DXIL::OpCode::LinAlgMatrixStoreToMemory}, |
7931 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulate, |
7932 | | TranslateLinAlgMatrixAccumulate, DXIL::OpCode::LinAlgMatrixAccumulate}, |
7933 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiply, |
7934 | | TranslateLinAlgMatrixMatrixMultiply, DXIL::OpCode::LinAlgMatrixMultiply}, |
7935 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate, |
7936 | | TranslateLinAlgMatrixMatrixMultiplyAccumulate, |
7937 | | DXIL::OpCode::LinAlgMatrixMultiplyAccumulate}, |
7938 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout, |
7939 | | TrivialNoArgOperation, DXIL::OpCode::LinAlgMatrixQueryAccumulatorLayout}, |
7940 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToDescriptor, |
7941 | | TranslateLinAlgMatrixAccumStoreToDescriptor, |
7942 | | DXIL::OpCode::LinAlgMatrixAccumulateToDescriptor}, |
7943 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixAccumulateToMemory, |
7944 | | TranslateLinAlgMatrixAccumStoreToMemory, |
7945 | | DXIL::OpCode::LinAlgMatrixAccumulateToMemory}, |
7946 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixOuterProduct, |
7947 | | TranslateLinAlgMatrixOuterProduct, DXIL::OpCode::LinAlgMatrixOuterProduct}, |
7948 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixVectorMultiply, |
7949 | | TranslateLinAlgMatVecMul, DXIL::OpCode::LinAlgMatVecMul}, |
7950 | | {IntrinsicOp::IOP___builtin_LinAlg_MatrixVectorMultiplyAdd, |
7951 | | TranslateLinAlgMatVecMulAdd, DXIL::OpCode::LinAlgMatVecMulAdd}, |
7952 | | |
7953 | | {IntrinsicOp::IOP_DebugBreak, TrivialNoArgOperation, |
7954 | | DXIL::OpCode::DebugBreak}, |
7955 | | {IntrinsicOp::IOP_DxIsDebuggerPresent, TranslateWaveToVal, |
7956 | | DXIL::OpCode::IsDebuggerPresent}, |
7957 | | |
7958 | | {IntrinsicOp::IOP___builtin_LinAlg_Convert, TranslateLinAlgConvert, |
7959 | | DXIL::OpCode::LinAlgConvert}, |
7960 | | }; |
7961 | | constexpr size_t NumLowerTableEntries = |
7962 | | sizeof(gLowerTable) / sizeof(gLowerTable[0]); |
7963 | | static_assert( |
7964 | | NumLowerTableEntries == static_cast<size_t>(IntrinsicOp::Num_Intrinsics), |
7965 | | "Intrinsic lowering table must be updated to account for new intrinsics."); |
7966 | | |
7967 | | // Make table-order failures report the bad index via template instantiation |
7968 | | // parameter in the diagnostic. |
7969 | | // On failure, use hlsl_intrinsic_opcodes.json to find the mismatch. |
7970 | | template <size_t I> struct ValidateLowerTableEntry { |
7971 | | // Instantiate a type that fails if the opcode doesn't match the index. |
7972 | | static_assert( |
7973 | | I == static_cast<size_t>(gLowerTable[I].IntriOpcode), |
7974 | | "Intrinsic lowering table is out of order. " |
7975 | | "See ValidateLowerTableEntry<I> template instantiation for Index."); |
7976 | | static constexpr bool Value = |
7977 | | I == static_cast<size_t>(gLowerTable[I].IntriOpcode); |
7978 | | }; |
7979 | | |
7980 | | template <size_t I, size_t N> struct ValidateLowerTableImpl { |
7981 | | static constexpr bool Value = ValidateLowerTableEntry<I>::Value && |
7982 | | ValidateLowerTableImpl<I + 1, N>::Value; |
7983 | | }; |
7984 | | |
7985 | | template <size_t N> struct ValidateLowerTableImpl<N, N> { |
7986 | | static constexpr bool Value = true; |
7987 | | }; |
7988 | | |
7989 | | static_assert(ValidateLowerTableImpl<0, NumLowerTableEntries>::Value, |
7990 | | "Intrinsic lowering table is out of order."); |
7991 | | } // namespace |
7992 | | |
7993 | | static void TranslateBuiltinIntrinsic(CallInst *CI, |
7994 | | HLOperationLowerHelper &helper, |
7995 | | HLObjectOperationLowerHelper *pObjHelper, |
7996 | 65.8k | bool &Translated) { |
7997 | 65.8k | unsigned opcode = hlsl::GetHLOpcode(CI); |
7998 | 65.8k | const IntrinsicLower &lower = gLowerTable[opcode]; |
7999 | 65.8k | DXASSERT((unsigned)lower.IntriOpcode == opcode, |
8000 | 65.8k | "Intrinsic lowering table index must match intrinsic opcode."); |
8001 | 65.8k | Value *Result = lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, |
8002 | 65.8k | helper, pObjHelper, Translated); |
8003 | 65.8k | if (Result) |
8004 | 40.7k | CI->replaceAllUsesWith(Result); |
8005 | 65.8k | } |
8006 | | |
8007 | | // SharedMem. |
8008 | | namespace { |
8009 | | |
8010 | 498 | bool IsSharedMemPtr(Value *Ptr) { |
8011 | 498 | return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace; |
8012 | 498 | } |
8013 | | |
8014 | 498 | bool IsLocalVariablePtr(Value *Ptr) { |
8015 | 1.10k | while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { |
8016 | 610 | Ptr = GEP->getPointerOperand(); |
8017 | 610 | } |
8018 | 498 | bool isAlloca = isa<AllocaInst>(Ptr); |
8019 | 498 | if (isAlloca) |
8020 | 0 | return true; |
8021 | | |
8022 | 498 | GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); |
8023 | 498 | if (!GV) |
8024 | 498 | return false; |
8025 | | |
8026 | 0 | return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage; |
8027 | 498 | } |
8028 | | |
8029 | | } // namespace |
8030 | | |
8031 | | // Constant buffer. |
8032 | | namespace { |
8033 | 2.31k | unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) { |
8034 | 2.31k | DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(), |
8035 | 2.31k | "not an element type"); |
8036 | | // TODO: Use real size after change constant buffer into linear layout. |
8037 | 2.31k | if (DL.getTypeSizeInBits(EltType) <= 32) { |
8038 | | // Constant buffer is 4 bytes align. |
8039 | 2.26k | return 4; |
8040 | 2.26k | } |
8041 | | |
8042 | 48 | return 8; |
8043 | 2.31k | } |
8044 | | |
8045 | | Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP, |
8046 | 0 | IRBuilder<> &Builder) { |
8047 | 0 | Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad); |
8048 | 0 |
|
8049 | 0 | DXASSERT(!EltTy->isIntegerTy(1), |
8050 | 0 | "Bools should not be loaded as their register representation."); |
8051 | 0 |
|
8052 | 0 | // Align to 8 bytes for now. |
8053 | 0 | Constant *align = hlslOP->GetU32Const(8); |
8054 | 0 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy); |
8055 | 0 | return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align}); |
8056 | 0 | } |
8057 | | |
8058 | | Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset, |
8059 | | bool colMajor, OP *OP, const DataLayout &DL, |
8060 | 0 | IRBuilder<> &Builder) { |
8061 | 0 | HLMatrixType MatTy = HLMatrixType::cast(matType); |
8062 | 0 | Type *EltTy = MatTy.getElementTypeForMem(); |
8063 | 0 | unsigned matSize = MatTy.getNumElements(); |
8064 | 0 | std::vector<Value *> elts(matSize); |
8065 | 0 | Value *EltByteSize = ConstantInt::get( |
8066 | 0 | offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL)); |
8067 | 0 |
|
8068 | 0 | // TODO: use real size after change constant buffer into linear layout. |
8069 | 0 | Value *baseOffset = offset; |
8070 | 0 | for (unsigned i = 0; i < matSize; i++) { |
8071 | 0 | elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder); |
8072 | 0 | baseOffset = Builder.CreateAdd(baseOffset, EltByteSize); |
8073 | 0 | } |
8074 | 0 |
|
8075 | 0 | Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder); |
8076 | 0 | Vec = MatTy.emitLoweredMemToReg(Vec, Builder); |
8077 | 0 | return Vec; |
8078 | 0 | } |
8079 | | |
8080 | | void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset, |
8081 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
8082 | | DxilFieldAnnotation *prevFieldAnnotation, |
8083 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
8084 | | HLObjectOperationLowerHelper *pObjHelper); |
8085 | | |
8086 | | Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP, |
8087 | 104 | IRBuilder<> &Builder, bool bInsertLdNextToGEP) { |
8088 | 104 | DXASSERT(GEP->getNumIndices() == 2, "must have 2 level"); |
8089 | 104 | Value *baseIdx = (GEP->idx_begin())->get(); |
8090 | 104 | Value *zeroIdx = Builder.getInt32(0); |
8091 | 104 | DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx, |
8092 | 104 | "base index must be 0"); |
8093 | 104 | Value *idx = (GEP->idx_begin() + 1)->get(); |
8094 | 104 | if (dyn_cast<ConstantInt>(idx)) { |
8095 | 56 | return Builder.CreateExtractElement(ldData, idx); |
8096 | 56 | } |
8097 | | |
8098 | | // Dynamic indexing. |
8099 | | // Copy vec to array. |
8100 | 48 | Type *Ty = ldData->getType(); |
8101 | 48 | Type *EltTy = Ty->getVectorElementType(); |
8102 | 48 | unsigned vecSize = Ty->getVectorNumElements(); |
8103 | 48 | ArrayType *AT = ArrayType::get(EltTy, vecSize); |
8104 | 48 | IRBuilder<> AllocaBuilder( |
8105 | 48 | GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt()); |
8106 | 48 | Value *tempArray = AllocaBuilder.CreateAlloca(AT); |
8107 | 48 | Value *zero = Builder.getInt32(0); |
8108 | 240 | for (unsigned int i = 0; i < vecSize; i++192 ) { |
8109 | 192 | Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i)); |
8110 | 192 | Value *Ptr = |
8111 | 192 | Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)}); |
8112 | 192 | Builder.CreateStore(Elt, Ptr); |
8113 | 192 | } |
8114 | | // Load from temp array. |
8115 | 48 | if (bInsertLdNextToGEP) { |
8116 | | // Insert the new GEP just before the old and to-be-deleted GEP |
8117 | 32 | Builder.SetInsertPoint(GEP); |
8118 | 32 | } |
8119 | 48 | Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx}); |
8120 | 48 | return Builder.CreateLoad(EltGEP); |
8121 | 104 | } |
8122 | | |
8123 | | void TranslateResourceInCB(LoadInst *LI, |
8124 | | HLObjectOperationLowerHelper *pObjHelper, |
8125 | 314 | GlobalVariable *CbGV) { |
8126 | 314 | if (LI->user_empty()) { |
8127 | 0 | LI->eraseFromParent(); |
8128 | 0 | return; |
8129 | 0 | } |
8130 | | |
8131 | 314 | GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand()); |
8132 | 314 | CallInst *CI = cast<CallInst>(LI->user_back()); |
8133 | 314 | CallInst *Anno = cast<CallInst>(CI->user_back()); |
8134 | 314 | DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno); |
8135 | 314 | Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP); |
8136 | | |
8137 | | // Lower Ptr to GV base Ptr. |
8138 | 314 | Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr); |
8139 | 314 | IRBuilder<> Builder(LI); |
8140 | 314 | Value *GvLd = Builder.CreateLoad(GvPtr); |
8141 | 314 | LI->replaceAllUsesWith(GvLd); |
8142 | 314 | LI->eraseFromParent(); |
8143 | 314 | } |
8144 | | |
8145 | | void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset, |
8146 | | hlsl::OP *hlslOP, |
8147 | | DxilFieldAnnotation *prevFieldAnnotation, |
8148 | | DxilTypeSystem &dxilTypeSys, const DataLayout &DL, |
8149 | 0 | HLObjectOperationLowerHelper *pObjHelper) { |
8150 | 0 | IRBuilder<> Builder(user); |
8151 | 0 | if (CallInst *CI = dyn_cast<CallInst>(user)) { |
8152 | 0 | HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); |
8153 | 0 | unsigned opcode = GetHLOpcode(CI); |
8154 | 0 | if (group == HLOpcodeGroup::HLMatLoadStore) { |
8155 | 0 | HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode); |
8156 | 0 | bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad; |
8157 | 0 | DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad || |
8158 | 0 | matOp == HLMatLoadStoreOpcode::RowMatLoad, |
8159 | 0 | "No store on cbuffer"); |
8160 | 0 | Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) |
8161 | 0 | ->getType() |
8162 | 0 | ->getPointerElementType(); |
8163 | 0 | Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset, |
8164 | 0 | colMajor, hlslOP, DL, Builder); |
8165 | 0 | CI->replaceAllUsesWith(newLd); |
8166 | 0 | CI->eraseFromParent(); |
8167 | 0 | } else if (group == HLOpcodeGroup::HLSubscript) { |
8168 | 0 | HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode); |
8169 | 0 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
8170 | 0 | HLMatrixType MatTy = |
8171 | 0 | HLMatrixType::cast(basePtr->getType()->getPointerElementType()); |
8172 | 0 | Type *EltTy = MatTy.getElementTypeForReg(); |
8173 | 0 |
|
8174 | 0 | Value *EltByteSize = ConstantInt::get( |
8175 | 0 | baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL)); |
8176 | 0 |
|
8177 | 0 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
8178 | 0 |
|
8179 | 0 | Type *resultType = CI->getType()->getPointerElementType(); |
8180 | 0 | unsigned resultSize = 1; |
8181 | 0 | if (resultType->isVectorTy()) |
8182 | 0 | resultSize = resultType->getVectorNumElements(); |
8183 | 0 | DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix"); |
8184 | 0 | assert(resultSize <= 16); |
8185 | 0 | Value *idxList[16]; |
8186 | 0 |
|
8187 | 0 | switch (subOp) { |
8188 | 0 | case HLSubscriptOpcode::ColMatSubscript: |
8189 | 0 | case HLSubscriptOpcode::RowMatSubscript: { |
8190 | 0 | for (unsigned i = 0; i < resultSize; i++) { |
8191 | 0 | Value *idx = |
8192 | 0 | CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); |
8193 | 0 | Value *offset = Builder.CreateMul(idx, EltByteSize); |
8194 | 0 | idxList[i] = Builder.CreateAdd(baseOffset, offset); |
8195 | 0 | } |
8196 | 0 |
|
8197 | 0 | } break; |
8198 | 0 | case HLSubscriptOpcode::RowMatElement: |
8199 | 0 | case HLSubscriptOpcode::ColMatElement: { |
8200 | 0 | Constant *EltIdxs = cast<Constant>(idx); |
8201 | 0 | for (unsigned i = 0; i < resultSize; i++) { |
8202 | 0 | Value *offset = |
8203 | 0 | Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize); |
8204 | 0 | idxList[i] = Builder.CreateAdd(baseOffset, offset); |
8205 | 0 | } |
8206 | 0 | } break; |
8207 | 0 | default: |
8208 | 0 | DXASSERT(0, "invalid operation on const buffer"); |
8209 | 0 | break; |
8210 | 0 | } |
8211 | 0 |
|
8212 | 0 | Value *ldData = UndefValue::get(resultType); |
8213 | 0 | if (resultType->isVectorTy()) { |
8214 | 0 | for (unsigned i = 0; i < resultSize; i++) { |
8215 | 0 | Value *eltData = |
8216 | 0 | GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder); |
8217 | 0 | ldData = Builder.CreateInsertElement(ldData, eltData, i); |
8218 | 0 | } |
8219 | 0 | } else { |
8220 | 0 | ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder); |
8221 | 0 | } |
8222 | 0 |
|
8223 | 0 | for (auto U = CI->user_begin(); U != CI->user_end();) { |
8224 | 0 | Value *subsUser = *(U++); |
8225 | 0 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) { |
8226 | 0 | Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder, |
8227 | 0 | /*bInsertLdNextToGEP*/ true); |
8228 | 0 |
|
8229 | 0 | for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { |
8230 | 0 | Value *gepUser = *(gepU++); |
8231 | 0 | // Must be load here; |
8232 | 0 | LoadInst *ldUser = cast<LoadInst>(gepUser); |
8233 | 0 | ldUser->replaceAllUsesWith(subData); |
8234 | 0 | ldUser->eraseFromParent(); |
8235 | 0 | } |
8236 | 0 | GEP->eraseFromParent(); |
8237 | 0 | } else { |
8238 | 0 | // Must be load here. |
8239 | 0 | LoadInst *ldUser = cast<LoadInst>(subsUser); |
8240 | 0 | ldUser->replaceAllUsesWith(ldData); |
8241 | 0 | ldUser->eraseFromParent(); |
8242 | 0 | } |
8243 | 0 | } |
8244 | 0 |
|
8245 | 0 | CI->eraseFromParent(); |
8246 | 0 | } else { |
8247 | 0 | DXASSERT(0, "not implemented yet"); |
8248 | 0 | } |
8249 | 0 | } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) { |
8250 | 0 | Type *Ty = ldInst->getType(); |
8251 | 0 | Type *EltTy = Ty->getScalarType(); |
8252 | 0 | // Resource inside cbuffer is lowered after GenerateDxilOperations. |
8253 | 0 | if (dxilutil::IsHLSLObjectType(Ty)) { |
8254 | 0 | CallInst *CI = cast<CallInst>(handle); |
8255 | 0 | // CI should be annotate handle. |
8256 | 0 | // Need createHandle here. |
8257 | 0 | if (GetHLOpcodeGroup(CI->getCalledFunction()) == |
8258 | 0 | HLOpcodeGroup::HLAnnotateHandle) |
8259 | 0 | CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx)); |
8260 | 0 | GlobalVariable *CbGV = cast<GlobalVariable>( |
8261 | 0 | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); |
8262 | 0 | TranslateResourceInCB(ldInst, pObjHelper, CbGV); |
8263 | 0 | return; |
8264 | 0 | } |
8265 | 0 | DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass"); |
8266 | 0 |
|
8267 | 0 | unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL); |
8268 | 0 |
|
8269 | 0 | Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder); |
8270 | 0 | if (Ty->isVectorTy()) { |
8271 | 0 | Value *result = UndefValue::get(Ty); |
8272 | 0 | result = Builder.CreateInsertElement(result, newLd, (uint64_t)0); |
8273 | 0 | // Update offset by 4 bytes. |
8274 | 0 | Value *offset = |
8275 | 0 | Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize)); |
8276 | 0 | for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) { |
8277 | 0 | Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder); |
8278 | 0 | result = Builder.CreateInsertElement(result, elt, i); |
8279 | 0 | // Update offset by 4 bytes. |
8280 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize)); |
8281 | 0 | } |
8282 | 0 | newLd = result; |
8283 | 0 | } |
8284 | 0 |
|
8285 | 0 | ldInst->replaceAllUsesWith(newLd); |
8286 | 0 | ldInst->eraseFromParent(); |
8287 | 0 | } else { |
8288 | 0 | // Must be GEP here |
8289 | 0 | GetElementPtrInst *GEP = cast<GetElementPtrInst>(user); |
8290 | 0 | TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder, |
8291 | 0 | prevFieldAnnotation, DL, dxilTypeSys, pObjHelper); |
8292 | 0 | GEP->eraseFromParent(); |
8293 | 0 | } |
8294 | 0 | } |
8295 | | |
8296 | | void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset, |
8297 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
8298 | | DxilFieldAnnotation *prevFieldAnnotation, |
8299 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
8300 | 0 | HLObjectOperationLowerHelper *pObjHelper) { |
8301 | 0 | SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); |
8302 | 0 |
|
8303 | 0 | Value *offset = baseOffset; |
8304 | 0 | // update offset |
8305 | 0 | DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation; |
8306 | 0 |
|
8307 | 0 | gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); |
8308 | 0 |
|
8309 | 0 | for (; GEPIt != E; GEPIt++) { |
8310 | 0 | Value *idx = GEPIt.getOperand(); |
8311 | 0 | unsigned immIdx = 0; |
8312 | 0 | bool bImmIdx = false; |
8313 | 0 | if (Constant *constIdx = dyn_cast<Constant>(idx)) { |
8314 | 0 | immIdx = constIdx->getUniqueInteger().getLimitedValue(); |
8315 | 0 | bImmIdx = true; |
8316 | 0 | } |
8317 | 0 |
|
8318 | 0 | if (GEPIt->isPointerTy()) { |
8319 | 0 | Type *EltTy = GEPIt->getPointerElementType(); |
8320 | 0 | unsigned size = 0; |
8321 | 0 | if (StructType *ST = dyn_cast<StructType>(EltTy)) { |
8322 | 0 | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
8323 | 0 | size = annotation->GetCBufferSize(); |
8324 | 0 | } else { |
8325 | 0 | DXASSERT(fieldAnnotation, "must be a field"); |
8326 | 0 | if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) { |
8327 | 0 | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
8328 | 0 | *fieldAnnotation, EltTy, dxilTypeSys); |
8329 | 0 |
|
8330 | 0 | // Decide the nested array size. |
8331 | 0 | unsigned nestedArraySize = 1; |
8332 | 0 |
|
8333 | 0 | Type *EltTy = AT->getArrayElementType(); |
8334 | 0 | // support multi level of array |
8335 | 0 | while (EltTy->isArrayTy()) { |
8336 | 0 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
8337 | 0 | nestedArraySize *= EltAT->getNumElements(); |
8338 | 0 | EltTy = EltAT->getElementType(); |
8339 | 0 | } |
8340 | 0 | // Align to 4 * 4 bytes. |
8341 | 0 | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
8342 | 0 | size = nestedArraySize * alignedSize; |
8343 | 0 | } else { |
8344 | 0 | size = DL.getTypeAllocSize(EltTy); |
8345 | 0 | } |
8346 | 0 | } |
8347 | 0 | // Align to 4 * 4 bytes. |
8348 | 0 | size = (size + 15) & 0xfffffff0; |
8349 | 0 | if (bImmIdx) { |
8350 | 0 | unsigned tempOffset = size * immIdx; |
8351 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset)); |
8352 | 0 | } else { |
8353 | 0 | Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size)); |
8354 | 0 | offset = Builder.CreateAdd(offset, tempOffset); |
8355 | 0 | } |
8356 | 0 | } else if (GEPIt->isStructTy()) { |
8357 | 0 | StructType *ST = cast<StructType>(*GEPIt); |
8358 | 0 | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
8359 | 0 | fieldAnnotation = &annotation->GetFieldAnnotation(immIdx); |
8360 | 0 | unsigned structOffset = fieldAnnotation->GetCBufferOffset(); |
8361 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset)); |
8362 | 0 | } else if (GEPIt->isArrayTy()) { |
8363 | 0 | DXASSERT(fieldAnnotation != nullptr, "must a field"); |
8364 | 0 | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
8365 | 0 | *fieldAnnotation, *GEPIt, dxilTypeSys); |
8366 | 0 | // Decide the nested array size. |
8367 | 0 | unsigned nestedArraySize = 1; |
8368 | 0 |
|
8369 | 0 | Type *EltTy = GEPIt->getArrayElementType(); |
8370 | 0 | // support multi level of array |
8371 | 0 | while (EltTy->isArrayTy()) { |
8372 | 0 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
8373 | 0 | nestedArraySize *= EltAT->getNumElements(); |
8374 | 0 | EltTy = EltAT->getElementType(); |
8375 | 0 | } |
8376 | 0 | // Align to 4 * 4 bytes. |
8377 | 0 | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
8378 | 0 | unsigned size = nestedArraySize * alignedSize; |
8379 | 0 | if (bImmIdx) { |
8380 | 0 | unsigned tempOffset = size * immIdx; |
8381 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset)); |
8382 | 0 | } else { |
8383 | 0 | Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size)); |
8384 | 0 | offset = Builder.CreateAdd(offset, tempOffset); |
8385 | 0 | } |
8386 | 0 | } else if (GEPIt->isVectorTy()) { |
8387 | 0 | unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType()); |
8388 | 0 | if (bImmIdx) { |
8389 | 0 | unsigned tempOffset = size * immIdx; |
8390 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset)); |
8391 | 0 | } else { |
8392 | 0 | Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size)); |
8393 | 0 | offset = Builder.CreateAdd(offset, tempOffset); |
8394 | 0 | } |
8395 | 0 | } else { |
8396 | 0 | gep_type_iterator temp = GEPIt; |
8397 | 0 | temp++; |
8398 | 0 | DXASSERT(temp == E, "scalar type must be the last"); |
8399 | 0 | } |
8400 | 0 | } |
8401 | 0 |
|
8402 | 0 | for (auto U = GEP->user_begin(); U != GEP->user_end();) { |
8403 | 0 | Instruction *user = cast<Instruction>(*(U++)); |
8404 | 0 |
|
8405 | 0 | TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation, |
8406 | 0 | dxilTypeSys, DL, pObjHelper); |
8407 | 0 | } |
8408 | 0 | } |
8409 | | |
8410 | | Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx, |
8411 | | unsigned channelOffset, Type *EltTy, OP *hlslOP, |
8412 | 15.1k | IRBuilder<> &Builder) { |
8413 | 15.1k | Constant *OpArg = |
8414 | 15.1k | hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy); |
8415 | | |
8416 | 15.1k | DXASSERT(!EltTy->isIntegerTy(1), |
8417 | 15.1k | "Bools should not be loaded as their register representation."); |
8418 | | |
8419 | 15.1k | Type *doubleTy = Type::getDoubleTy(EltTy->getContext()); |
8420 | 15.1k | Type *halfTy = Type::getHalfTy(EltTy->getContext()); |
8421 | 15.1k | Type *i64Ty = Type::getInt64Ty(EltTy->getContext()); |
8422 | 15.1k | Type *i16Ty = Type::getInt16Ty(EltTy->getContext()); |
8423 | | |
8424 | 15.1k | bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty); |
8425 | 15.1k | bool is16 = (EltTy == halfTy || EltTy == i16Ty14.5k ) && !hlslOP->UseMinPrecision()762 ; |
8426 | 15.1k | DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4, |
8427 | 15.1k | "legacy cbuffer don't across 16 bytes register."); |
8428 | 15.1k | if (is64) { |
8429 | 428 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
8430 | 428 | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
8431 | 428 | DXASSERT((channelOffset & 1) == 0, |
8432 | 428 | "channel offset must be even for double"); |
8433 | 428 | unsigned eltIdx = channelOffset >> 1; |
8434 | 428 | Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx); |
8435 | 428 | return Result; |
8436 | 428 | } |
8437 | | |
8438 | 14.6k | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
8439 | 14.6k | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
8440 | 14.6k | return Builder.CreateExtractValue(loadLegacy, channelOffset); |
8441 | 15.1k | } |
8442 | | |
8443 | | Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx, |
8444 | | unsigned channelOffset, Type *EltTy, |
8445 | | unsigned vecSize, OP *hlslOP, |
8446 | 14.7k | IRBuilder<> &Builder) { |
8447 | 14.7k | Constant *OpArg = |
8448 | 14.7k | hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy); |
8449 | | |
8450 | 14.7k | DXASSERT(!EltTy->isIntegerTy(1), |
8451 | 14.7k | "Bools should not be loaded as their register representation."); |
8452 | | |
8453 | 14.7k | Type *doubleTy = Type::getDoubleTy(EltTy->getContext()); |
8454 | 14.7k | Type *i64Ty = Type::getInt64Ty(EltTy->getContext()); |
8455 | 14.7k | Type *halfTy = Type::getHalfTy(EltTy->getContext()); |
8456 | 14.7k | Type *shortTy = Type::getInt16Ty(EltTy->getContext()); |
8457 | | |
8458 | 14.7k | bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty); |
8459 | 14.7k | bool is16 = |
8460 | 14.7k | (EltTy == shortTy || EltTy == halfTy14.5k ) && !hlslOP->UseMinPrecision()898 ; |
8461 | 14.7k | DXASSERT((is16 && channelOffset + vecSize <= 8) || |
8462 | 14.7k | (channelOffset + vecSize) <= 4, |
8463 | 14.7k | "legacy cbuffer don't across 16 bytes register."); |
8464 | 14.7k | if (is16) { |
8465 | 536 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
8466 | 536 | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
8467 | 536 | Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); |
8468 | 2.06k | for (unsigned i = 0; i < vecSize; ++i1.53k ) { |
8469 | 1.53k | Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i); |
8470 | 1.53k | Result = Builder.CreateInsertElement(Result, NewElt, i); |
8471 | 1.53k | } |
8472 | 536 | return Result; |
8473 | 536 | } |
8474 | | |
8475 | 14.2k | if (is64) { |
8476 | 76 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
8477 | 76 | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
8478 | 76 | Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); |
8479 | 76 | unsigned smallVecSize = 2; |
8480 | 76 | if (vecSize < smallVecSize) |
8481 | 0 | smallVecSize = vecSize; |
8482 | 228 | for (unsigned i = 0; i < smallVecSize; ++i152 ) { |
8483 | 152 | Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i); |
8484 | 152 | Result = Builder.CreateInsertElement(Result, NewElt, i); |
8485 | 152 | } |
8486 | 76 | if (vecSize > 2) { |
8487 | | // Got to next cb register. |
8488 | 68 | legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1)); |
8489 | 68 | Value *loadLegacy = |
8490 | 68 | Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
8491 | 204 | for (unsigned i = 2; i < vecSize; ++i136 ) { |
8492 | 136 | Value *NewElt = Builder.CreateExtractValue(loadLegacy, i - 2); |
8493 | 136 | Result = Builder.CreateInsertElement(Result, NewElt, i); |
8494 | 136 | } |
8495 | 68 | } |
8496 | 76 | return Result; |
8497 | 76 | } |
8498 | | |
8499 | 14.1k | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
8500 | 14.1k | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
8501 | 14.1k | Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); |
8502 | 62.5k | for (unsigned i = 0; i < vecSize; ++i48.3k ) { |
8503 | 48.3k | Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i); |
8504 | 48.3k | Result = Builder.CreateInsertElement(Result, NewElt, i); |
8505 | 48.3k | } |
8506 | 14.1k | return Result; |
8507 | 14.2k | } |
8508 | | |
8509 | | Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle, |
8510 | | Value *legacyIdx, bool colMajor, OP *OP, |
8511 | | bool memElemRepr, const DataLayout &DL, |
8512 | 2.17k | IRBuilder<> &Builder) { |
8513 | 2.17k | Type *EltTy = MatTy.getElementTypeForMem(); |
8514 | | |
8515 | 2.17k | unsigned matSize = MatTy.getNumElements(); |
8516 | 2.17k | std::vector<Value *> elts(matSize); |
8517 | 2.17k | unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL); |
8518 | 2.17k | if (colMajor) { |
8519 | 1.72k | unsigned colByteSize = 4 * EltByteSize; |
8520 | 1.72k | unsigned colRegSize = (colByteSize + 15) >> 4; |
8521 | 7.72k | for (unsigned c = 0; c < MatTy.getNumColumns(); c++6.00k ) { |
8522 | 6.00k | Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0, |
8523 | 6.00k | EltTy, MatTy.getNumRows(), OP, Builder); |
8524 | | |
8525 | 27.6k | for (unsigned r = 0; r < MatTy.getNumRows(); r++21.6k ) { |
8526 | 21.6k | unsigned matIdx = MatTy.getColumnMajorIndex(r, c); |
8527 | 21.6k | elts[matIdx] = Builder.CreateExtractElement(col, r); |
8528 | 21.6k | } |
8529 | | // Update offset for a column. |
8530 | 6.00k | legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize)); |
8531 | 6.00k | } |
8532 | 1.72k | } else { |
8533 | 448 | unsigned rowByteSize = 4 * EltByteSize; |
8534 | 448 | unsigned rowRegSize = (rowByteSize + 15) >> 4; |
8535 | 1.73k | for (unsigned r = 0; r < MatTy.getNumRows(); r++1.28k ) { |
8536 | 1.28k | Value *row = |
8537 | 1.28k | GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0, EltTy, |
8538 | 1.28k | MatTy.getNumColumns(), OP, Builder); |
8539 | 5.32k | for (unsigned c = 0; c < MatTy.getNumColumns(); c++4.03k ) { |
8540 | 4.03k | unsigned matIdx = MatTy.getRowMajorIndex(r, c); |
8541 | 4.03k | elts[matIdx] = Builder.CreateExtractElement(row, c); |
8542 | 4.03k | } |
8543 | | // Update offset for a row. |
8544 | 1.28k | legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize)); |
8545 | 1.28k | } |
8546 | 448 | } |
8547 | | |
8548 | 2.17k | Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder); |
8549 | 2.17k | if (!memElemRepr) |
8550 | 1.86k | Vec = MatTy.emitLoweredMemToReg(Vec, Builder); |
8551 | 2.17k | return Vec; |
8552 | 2.17k | } |
8553 | | |
8554 | | void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle, |
8555 | | Value *legacyIdx, unsigned channelOffset, |
8556 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
8557 | | DxilFieldAnnotation *prevFieldAnnotation, |
8558 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
8559 | | HLObjectOperationLowerHelper *pObjHelper); |
8560 | | |
8561 | | void TranslateCBAddressUserLegacy(Instruction *user, Value *handle, |
8562 | | Value *legacyIdx, unsigned channelOffset, |
8563 | | hlsl::OP *hlslOP, |
8564 | | DxilFieldAnnotation *prevFieldAnnotation, |
8565 | | DxilTypeSystem &dxilTypeSys, |
8566 | | const DataLayout &DL, |
8567 | 43.0k | HLObjectOperationLowerHelper *pObjHelper) { |
8568 | 43.0k | IRBuilder<> Builder(user); |
8569 | 43.0k | if (CallInst *CI = dyn_cast<CallInst>(user)) { |
8570 | 2.24k | HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); |
8571 | 2.24k | if (group == HLOpcodeGroup::HLMatLoadStore) { |
8572 | 1.86k | unsigned opcode = GetHLOpcode(CI); |
8573 | 1.86k | HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode); |
8574 | 1.86k | bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad; |
8575 | 1.86k | DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad || |
8576 | 1.86k | matOp == HLMatLoadStoreOpcode::RowMatLoad, |
8577 | 1.86k | "No store on cbuffer"); |
8578 | 1.86k | HLMatrixType MatTy = |
8579 | 1.86k | HLMatrixType::cast(CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) |
8580 | 1.86k | ->getType() |
8581 | 1.86k | ->getPointerElementType()); |
8582 | | // This will replace a call, so we should use the register representation |
8583 | | // of elements |
8584 | 1.86k | Value *newLd = TranslateConstBufMatLdLegacy( |
8585 | 1.86k | MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ false, DL, |
8586 | 1.86k | Builder); |
8587 | 1.86k | CI->replaceAllUsesWith(newLd); |
8588 | 1.86k | dxilutil::TryScatterDebugValueToVectorElements(newLd); |
8589 | 1.86k | CI->eraseFromParent(); |
8590 | 1.86k | } else if (372 group == HLOpcodeGroup::HLSubscript372 ) { |
8591 | 352 | unsigned opcode = GetHLOpcode(CI); |
8592 | 352 | HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode); |
8593 | 352 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
8594 | 352 | HLMatrixType MatTy = |
8595 | 352 | HLMatrixType::cast(basePtr->getType()->getPointerElementType()); |
8596 | 352 | Type *EltTy = MatTy.getElementTypeForReg(); |
8597 | | |
8598 | 352 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
8599 | | |
8600 | 352 | Type *resultType = CI->getType()->getPointerElementType(); |
8601 | 352 | unsigned resultSize = 1; |
8602 | 352 | if (resultType->isVectorTy()) |
8603 | 256 | resultSize = resultType->getVectorNumElements(); |
8604 | 352 | DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix"); |
8605 | 352 | assert(resultSize <= 16); |
8606 | 352 | Value *idxList[16]; |
8607 | 352 | bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript || |
8608 | 352 | subOp == HLSubscriptOpcode::ColMatElement178 ; |
8609 | 352 | bool dynamicIndexing = !isa<ConstantInt>(idx) && |
8610 | 352 | !isa<ConstantAggregateZero>(idx)162 && |
8611 | 352 | !isa<ConstantDataSequential>(idx)138 ; |
8612 | | |
8613 | 352 | Value *ldData = UndefValue::get(resultType); |
8614 | 352 | if (!dynamicIndexing) { |
8615 | | // This will replace a load or GEP, so we should use the memory |
8616 | | // representation of elements |
8617 | 302 | Value *matLd = TranslateConstBufMatLdLegacy( |
8618 | 302 | MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ true, |
8619 | 302 | DL, Builder); |
8620 | | // The matLd is keep original layout, just use the idx calc in |
8621 | | // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript. |
8622 | 302 | switch (subOp) { |
8623 | 50 | case HLSubscriptOpcode::RowMatSubscript: |
8624 | 190 | case HLSubscriptOpcode::ColMatSubscript: { |
8625 | 830 | for (unsigned i = 0; i < resultSize; i++640 ) { |
8626 | 640 | idxList[i] = |
8627 | 640 | CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); |
8628 | 640 | } |
8629 | 190 | } break; |
8630 | 32 | case HLSubscriptOpcode::RowMatElement: |
8631 | 112 | case HLSubscriptOpcode::ColMatElement: { |
8632 | 112 | Constant *EltIdxs = cast<Constant>(idx); |
8633 | 264 | for (unsigned i = 0; i < resultSize; i++152 ) { |
8634 | 152 | idxList[i] = EltIdxs->getAggregateElement(i); |
8635 | 152 | } |
8636 | 112 | } break; |
8637 | 0 | default: |
8638 | 0 | DXASSERT(0, "invalid operation on const buffer"); |
8639 | 0 | break; |
8640 | 302 | } |
8641 | | |
8642 | 302 | if (resultType->isVectorTy()) { |
8643 | 902 | for (unsigned i = 0; i < resultSize; i++696 ) { |
8644 | 696 | Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]); |
8645 | 696 | ldData = Builder.CreateInsertElement(ldData, eltData, i); |
8646 | 696 | } |
8647 | 206 | } else { |
8648 | 96 | Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]); |
8649 | 96 | ldData = eltData; |
8650 | 96 | } |
8651 | 302 | } else { |
8652 | | // Must be matSub here. |
8653 | 50 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
8654 | | |
8655 | 50 | if (colMajor) { |
8656 | | // idx is c * row + r. |
8657 | | // For first col, c is 0, so idx is r. |
8658 | 34 | Value *one = Builder.getInt32(1); |
8659 | | // row.x = c[0].[idx] |
8660 | | // row.y = c[1].[idx] |
8661 | | // row.z = c[2].[idx] |
8662 | | // row.w = c[3].[idx] |
8663 | 34 | Value *Elts[4]; |
8664 | 34 | ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumRows()); |
8665 | | |
8666 | 34 | IRBuilder<> AllocaBuilder(user->getParent() |
8667 | 34 | ->getParent() |
8668 | 34 | ->getEntryBlock() |
8669 | 34 | .getFirstInsertionPt()); |
8670 | | |
8671 | 34 | Value *tempArray = AllocaBuilder.CreateAlloca(AT); |
8672 | 34 | Value *zero = AllocaBuilder.getInt32(0); |
8673 | 34 | Value *cbufIdx = legacyIdx; |
8674 | 158 | for (unsigned int c = 0; c < MatTy.getNumColumns(); c++124 ) { |
8675 | 124 | Value *ColVal = GenerateCBLoadLegacy( |
8676 | 124 | handle, cbufIdx, /*channelOffset*/ 0, EltTy, MatTy.getNumRows(), |
8677 | 124 | hlslOP, Builder); |
8678 | | // Convert ColVal to array for indexing. |
8679 | 592 | for (unsigned int r = 0; r < MatTy.getNumRows(); r++468 ) { |
8680 | 468 | Value *Elt = |
8681 | 468 | Builder.CreateExtractElement(ColVal, Builder.getInt32(r)); |
8682 | 468 | Value *Ptr = Builder.CreateInBoundsGEP( |
8683 | 468 | tempArray, {zero, Builder.getInt32(r)}); |
8684 | 468 | Builder.CreateStore(Elt, Ptr); |
8685 | 468 | } |
8686 | | |
8687 | 124 | Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx}); |
8688 | 124 | Elts[c] = Builder.CreateLoad(Ptr); |
8689 | | // Update cbufIdx. |
8690 | 124 | cbufIdx = Builder.CreateAdd(cbufIdx, one); |
8691 | 124 | } |
8692 | 34 | if (resultType->isVectorTy()) { |
8693 | 158 | for (unsigned int c = 0; c < MatTy.getNumColumns(); c++124 ) { |
8694 | 124 | ldData = Builder.CreateInsertElement(ldData, Elts[c], c); |
8695 | 124 | } |
8696 | 34 | } else { |
8697 | 0 | ldData = Elts[0]; |
8698 | 0 | } |
8699 | 34 | } else { |
8700 | | // idx is r * col + c; |
8701 | | // r = idx / col; |
8702 | 16 | Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns()); |
8703 | 16 | idx = Builder.CreateUDiv(idx, cCol); |
8704 | 16 | idx = Builder.CreateAdd(idx, legacyIdx); |
8705 | | // Just return a row; 'col' is the number of columns in the row. |
8706 | 16 | ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy, |
8707 | 16 | MatTy.getNumColumns(), hlslOP, Builder); |
8708 | 16 | } |
8709 | 50 | if (!resultType->isVectorTy()) { |
8710 | 0 | ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0)); |
8711 | 0 | } |
8712 | 50 | } |
8713 | | |
8714 | 704 | for (auto U = CI->user_begin(); 352 U != CI->user_end();) { |
8715 | 352 | Value *subsUser = *(U++); |
8716 | 352 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) { |
8717 | 80 | Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder, |
8718 | 80 | /*bInsertLdNextToGEP*/ true); |
8719 | 160 | for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { |
8720 | 80 | Value *gepUser = *(gepU++); |
8721 | | // Must be load here; |
8722 | 80 | LoadInst *ldUser = cast<LoadInst>(gepUser); |
8723 | 80 | ldUser->replaceAllUsesWith(subData); |
8724 | 80 | ldUser->eraseFromParent(); |
8725 | 80 | } |
8726 | 80 | GEP->eraseFromParent(); |
8727 | 272 | } else { |
8728 | | // Must be load here. |
8729 | 272 | LoadInst *ldUser = cast<LoadInst>(subsUser); |
8730 | 272 | ldUser->replaceAllUsesWith(ldData); |
8731 | 272 | ldUser->eraseFromParent(); |
8732 | 272 | } |
8733 | 352 | } |
8734 | | |
8735 | 352 | CI->eraseFromParent(); |
8736 | 352 | } else if (IntrinsicInst *20 II20 = dyn_cast<IntrinsicInst>(user)) { |
8737 | 20 | if (II->getIntrinsicID() == Intrinsic::lifetime_start || |
8738 | 20 | II->getIntrinsicID() == Intrinsic::lifetime_end10 ) { |
8739 | 20 | DXASSERT(II->use_empty(), "lifetime intrinsic can't have uses"); |
8740 | 20 | II->eraseFromParent(); |
8741 | 20 | } else { |
8742 | 0 | DXASSERT(0, "not implemented yet"); |
8743 | 0 | } |
8744 | 20 | } else { |
8745 | 0 | DXASSERT(0, "not implemented yet"); |
8746 | 0 | } |
8747 | 40.8k | } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) { |
8748 | 22.7k | Type *Ty = ldInst->getType(); |
8749 | 22.7k | Type *EltTy = Ty->getScalarType(); |
8750 | | // Resource inside cbuffer is lowered after GenerateDxilOperations. |
8751 | 22.7k | if (dxilutil::IsHLSLObjectType(Ty)) { |
8752 | 314 | CallInst *CI = cast<CallInst>(handle); |
8753 | | // CI should be annotate handle. |
8754 | | // Need createHandle here. |
8755 | 314 | if (GetHLOpcodeGroup(CI->getCalledFunction()) == |
8756 | 314 | HLOpcodeGroup::HLAnnotateHandle) |
8757 | 314 | CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx)); |
8758 | | |
8759 | 314 | GlobalVariable *CbGV = cast<GlobalVariable>( |
8760 | 314 | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); |
8761 | 314 | TranslateResourceInCB(ldInst, pObjHelper, CbGV); |
8762 | 314 | return; |
8763 | 314 | } |
8764 | 22.4k | DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass"); |
8765 | | |
8766 | 22.4k | Value *newLd = nullptr; |
8767 | | |
8768 | 22.4k | if (Ty->isVectorTy()) |
8769 | 7.31k | newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy, |
8770 | 7.31k | Ty->getVectorNumElements(), hlslOP, Builder); |
8771 | 15.1k | else |
8772 | 15.1k | newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy, |
8773 | 15.1k | hlslOP, Builder); |
8774 | | |
8775 | 22.4k | ldInst->replaceAllUsesWith(newLd); |
8776 | 22.4k | dxilutil::TryScatterDebugValueToVectorElements(newLd); |
8777 | 22.4k | ldInst->eraseFromParent(); |
8778 | 22.4k | } else if (BitCastInst *18.1k BCI18.1k = dyn_cast<BitCastInst>(user)) { |
8779 | 64 | for (auto it = BCI->user_begin(); it != BCI->user_end();) { |
8780 | 36 | Instruction *I = cast<Instruction>(*it++); |
8781 | 36 | TranslateCBAddressUserLegacy(I, handle, legacyIdx, channelOffset, hlslOP, |
8782 | 36 | prevFieldAnnotation, dxilTypeSys, DL, |
8783 | 36 | pObjHelper); |
8784 | 36 | } |
8785 | 28 | BCI->eraseFromParent(); |
8786 | 18.0k | } else { |
8787 | | // Must be GEP here |
8788 | 18.0k | GetElementPtrInst *GEP = cast<GetElementPtrInst>(user); |
8789 | 18.0k | TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder, |
8790 | 18.0k | prevFieldAnnotation, DL, dxilTypeSys, pObjHelper); |
8791 | 18.0k | GEP->eraseFromParent(); |
8792 | 18.0k | } |
8793 | 43.0k | } |
8794 | | |
8795 | | void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle, |
8796 | | Value *legacyIndex, unsigned channel, |
8797 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
8798 | | DxilFieldAnnotation *prevFieldAnnotation, |
8799 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
8800 | 18.0k | HLObjectOperationLowerHelper *pObjHelper) { |
8801 | 18.0k | SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); |
8802 | | |
8803 | | // update offset |
8804 | 18.0k | DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation; |
8805 | | |
8806 | 18.0k | gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); |
8807 | | |
8808 | 62.2k | for (; GEPIt != E; GEPIt++44.1k ) { |
8809 | 44.2k | Value *idx = GEPIt.getOperand(); |
8810 | 44.2k | unsigned immIdx = 0; |
8811 | 44.2k | bool bImmIdx = false; |
8812 | 44.2k | if (Constant *constIdx = dyn_cast<Constant>(idx)) { |
8813 | 41.4k | immIdx = constIdx->getUniqueInteger().getLimitedValue(); |
8814 | 41.4k | bImmIdx = true; |
8815 | 41.4k | } |
8816 | | |
8817 | 44.2k | if (GEPIt->isPointerTy()) { |
8818 | 18.0k | Type *EltTy = GEPIt->getPointerElementType(); |
8819 | 18.0k | unsigned size = 0; |
8820 | 18.0k | if (StructType *ST = dyn_cast<StructType>(EltTy)) { |
8821 | 18.0k | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
8822 | 18.0k | size = annotation->GetCBufferSize(); |
8823 | 18.0k | } else { |
8824 | 32 | DXASSERT(fieldAnnotation, "must be a field"); |
8825 | 32 | if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) { |
8826 | 32 | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
8827 | 32 | *fieldAnnotation, EltTy, dxilTypeSys); |
8828 | | |
8829 | | // Decide the nested array size. |
8830 | 32 | unsigned nestedArraySize = 1; |
8831 | | |
8832 | 32 | Type *EltTy = AT->getArrayElementType(); |
8833 | | // support multi level of array |
8834 | 40 | while (EltTy->isArrayTy()) { |
8835 | 8 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
8836 | 8 | nestedArraySize *= EltAT->getNumElements(); |
8837 | 8 | EltTy = EltAT->getElementType(); |
8838 | 8 | } |
8839 | | // Align to 4 * 4 bytes. |
8840 | 32 | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
8841 | 32 | size = nestedArraySize * alignedSize; |
8842 | 32 | } else { |
8843 | 0 | size = DL.getTypeAllocSize(EltTy); |
8844 | 0 | } |
8845 | 32 | } |
8846 | | // Skip 0 idx. |
8847 | 18.0k | if (bImmIdx && immIdx == 0) |
8848 | 18.0k | continue; |
8849 | | // Align to 4 * 4 bytes. |
8850 | 0 | size = (size + 15) & 0xfffffff0; |
8851 | | |
8852 | | // Take this as array idxing. |
8853 | 0 | if (bImmIdx) { |
8854 | 0 | unsigned tempOffset = size * immIdx; |
8855 | 0 | unsigned idxInc = tempOffset >> 4; |
8856 | 0 | legacyIndex = |
8857 | 0 | Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc)); |
8858 | 0 | } else { |
8859 | 0 | Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4)); |
8860 | 0 | legacyIndex = Builder.CreateAdd(legacyIndex, idxInc); |
8861 | 0 | } |
8862 | | |
8863 | | // Array always start from x channel. |
8864 | 0 | channel = 0; |
8865 | 26.1k | } else if (GEPIt->isStructTy()) { |
8866 | 21.4k | StructType *ST = cast<StructType>(*GEPIt); |
8867 | 21.4k | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
8868 | 21.4k | fieldAnnotation = &annotation->GetFieldAnnotation(immIdx); |
8869 | | |
8870 | 21.4k | unsigned idxInc = 0; |
8871 | 21.4k | unsigned structOffset = 0; |
8872 | 21.4k | if (fieldAnnotation->GetCompType().Is16Bit() && |
8873 | 21.4k | !hlslOP->UseMinPrecision()1.10k ) { |
8874 | 764 | structOffset = fieldAnnotation->GetCBufferOffset() >> 1; |
8875 | 764 | channel += structOffset; |
8876 | 764 | idxInc = channel >> 3; |
8877 | 764 | channel = channel & 0x7; |
8878 | 20.7k | } else { |
8879 | 20.7k | structOffset = fieldAnnotation->GetCBufferOffset() >> 2; |
8880 | 20.7k | channel += structOffset; |
8881 | 20.7k | idxInc = channel >> 2; |
8882 | 20.7k | channel = channel & 0x3; |
8883 | 20.7k | } |
8884 | 21.4k | if (idxInc) |
8885 | 8.27k | legacyIndex = |
8886 | 8.27k | Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc)); |
8887 | 21.4k | } else if (4.64k GEPIt->isArrayTy()4.64k ) { |
8888 | 4.17k | DXASSERT(fieldAnnotation != nullptr, "must a field"); |
8889 | 4.17k | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
8890 | 4.17k | *fieldAnnotation, *GEPIt, dxilTypeSys); |
8891 | | // Decide the nested array size. |
8892 | 4.17k | unsigned nestedArraySize = 1; |
8893 | | |
8894 | 4.17k | Type *EltTy = GEPIt->getArrayElementType(); |
8895 | | // support multi level of array |
8896 | 4.78k | while (EltTy->isArrayTy()) { |
8897 | 606 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
8898 | 606 | nestedArraySize *= EltAT->getNumElements(); |
8899 | 606 | EltTy = EltAT->getElementType(); |
8900 | 606 | } |
8901 | | // Align to 4 * 4 bytes. |
8902 | 4.17k | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
8903 | 4.17k | unsigned size = nestedArraySize * alignedSize; |
8904 | 4.17k | if (bImmIdx) { |
8905 | 1.41k | unsigned tempOffset = size * immIdx; |
8906 | 1.41k | unsigned idxInc = tempOffset >> 4; |
8907 | 1.41k | legacyIndex = |
8908 | 1.41k | Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc)); |
8909 | 2.76k | } else { |
8910 | 2.76k | Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4)); |
8911 | 2.76k | legacyIndex = Builder.CreateAdd(legacyIndex, idxInc); |
8912 | 2.76k | } |
8913 | | |
8914 | | // Array always start from x channel. |
8915 | 4.17k | channel = 0; |
8916 | 4.17k | } else if (470 GEPIt->isVectorTy()470 ) { |
8917 | | // Indexing on vector. |
8918 | 470 | if (bImmIdx) { |
8919 | 422 | if (immIdx < GEPIt->getVectorNumElements()) { |
8920 | 394 | const unsigned vectorElmSize = |
8921 | 394 | DL.getTypeAllocSize(GEPIt->getVectorElementType()); |
8922 | 394 | const bool bIs16bitType = vectorElmSize == 2; |
8923 | 394 | const unsigned tempOffset = vectorElmSize * immIdx; |
8924 | 394 | const unsigned numChannelsPerRow = bIs16bitType ? 832 : 4362 ; |
8925 | 394 | const unsigned channelInc = |
8926 | 394 | bIs16bitType ? tempOffset >> 132 : tempOffset >> 2362 ; |
8927 | | |
8928 | 394 | DXASSERT((channel + channelInc) < numChannelsPerRow, |
8929 | 394 | "vector should not cross cb register"); |
8930 | 394 | channel += channelInc; |
8931 | 394 | if (channel == numChannelsPerRow) { |
8932 | | // Get to another row. |
8933 | | // Update index and channel. |
8934 | 0 | channel = 0; |
8935 | 0 | legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1)); |
8936 | 0 | } |
8937 | 394 | } else { |
8938 | 28 | StringRef resName = "(unknown)"; |
8939 | 28 | if (DxilResourceBase *Res = |
8940 | 28 | pObjHelper->FindCBufferResourceFromHandle(handle)) { |
8941 | 28 | resName = Res->GetGlobalName(); |
8942 | 28 | } |
8943 | 28 | legacyIndex = hlsl::CreatePoisonValue( |
8944 | 28 | legacyIndex->getType(), |
8945 | 28 | Twine("Out of bounds index (") + Twine(immIdx) + |
8946 | 28 | Twine(") in CBuffer '") + Twine(resName) + ("'"), |
8947 | 28 | GEP->getDebugLoc(), GEP); |
8948 | 28 | channel = 0; |
8949 | 28 | } |
8950 | 422 | } else { |
8951 | 48 | Type *EltTy = GEPIt->getVectorElementType(); |
8952 | 48 | unsigned vecSize = GEPIt->getVectorNumElements(); |
8953 | | |
8954 | | // Load the whole register. |
8955 | 48 | Value *newLd = |
8956 | 48 | GenerateCBLoadLegacy(handle, legacyIndex, |
8957 | 48 | /*channelOffset*/ channel, EltTy, |
8958 | 48 | /*vecSize*/ vecSize, hlslOP, Builder); |
8959 | | // Copy to array. |
8960 | 48 | IRBuilder<> AllocaBuilder(GEP->getParent() |
8961 | 48 | ->getParent() |
8962 | 48 | ->getEntryBlock() |
8963 | 48 | .getFirstInsertionPt()); |
8964 | 48 | Value *tempArray = |
8965 | 48 | AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, vecSize)); |
8966 | 48 | Value *zeroIdx = hlslOP->GetU32Const(0); |
8967 | 216 | for (unsigned i = 0; i < vecSize; i++168 ) { |
8968 | 168 | Value *Elt = Builder.CreateExtractElement(newLd, i); |
8969 | 168 | Value *EltGEP = Builder.CreateInBoundsGEP( |
8970 | 168 | tempArray, {zeroIdx, hlslOP->GetU32Const(i)}); |
8971 | 168 | Builder.CreateStore(Elt, EltGEP); |
8972 | 168 | } |
8973 | | // Make sure this is the end of GEP. |
8974 | 48 | gep_type_iterator temp = GEPIt; |
8975 | 48 | temp++; |
8976 | 48 | DXASSERT(temp == E, "scalar type must be the last"); |
8977 | | |
8978 | | // Replace the GEP with array GEP. |
8979 | 48 | Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx}); |
8980 | 48 | GEP->replaceAllUsesWith(ArrayGEP); |
8981 | 48 | return; |
8982 | 48 | } |
8983 | 470 | } else { |
8984 | 0 | gep_type_iterator temp = GEPIt; |
8985 | 0 | temp++; |
8986 | 0 | DXASSERT(temp == E, "scalar type must be the last"); |
8987 | 0 | } |
8988 | 44.2k | } |
8989 | | |
8990 | 43.0k | for (auto U = GEP->user_begin(); 18.0k U != GEP->user_end();) { |
8991 | 24.9k | Instruction *user = cast<Instruction>(*(U++)); |
8992 | | |
8993 | 24.9k | TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, |
8994 | 24.9k | fieldAnnotation, dxilTypeSys, DL, pObjHelper); |
8995 | 24.9k | } |
8996 | 18.0k | } |
8997 | | |
8998 | | void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP, |
8999 | | DxilTypeSystem &dxilTypeSys, |
9000 | | const DataLayout &DL, |
9001 | 8.73k | HLObjectOperationLowerHelper *pObjHelper) { |
9002 | 8.73k | auto User = ptr->user_begin(); |
9003 | 8.73k | auto UserE = ptr->user_end(); |
9004 | 8.73k | Value *zeroIdx = hlslOP->GetU32Const(0); |
9005 | 26.8k | for (; User != UserE;) { |
9006 | | // Must be Instruction. |
9007 | 18.0k | Instruction *I = cast<Instruction>(*(User++)); |
9008 | 18.0k | TranslateCBAddressUserLegacy( |
9009 | 18.0k | I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP, |
9010 | 18.0k | /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper); |
9011 | 18.0k | } |
9012 | 8.73k | } |
9013 | | |
9014 | | } // namespace |
9015 | | |
9016 | | // Structured buffer. |
9017 | | namespace { |
9018 | | |
9019 | | Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, |
9020 | | Value *status, Type *EltTy, |
9021 | | MutableArrayRef<Value *> resultElts, hlsl::OP *OP, |
9022 | | IRBuilder<> &Builder, unsigned NumComponents, |
9023 | 28 | Constant *alignment) { |
9024 | 28 | OP::OpCode opcode = OP::OpCode::RawBufferLoad; |
9025 | | |
9026 | 28 | DXASSERT(resultElts.size() <= 4, |
9027 | 28 | "buffer load cannot load more than 4 values"); |
9028 | | |
9029 | 28 | if (bufIdx == nullptr) { |
9030 | | // This is actually a byte address buffer load with a struct template type. |
9031 | | // The call takes only one coordinates for the offset. |
9032 | 0 | bufIdx = offset; |
9033 | 0 | offset = UndefValue::get(offset->getType()); |
9034 | 0 | } |
9035 | | |
9036 | 28 | Function *dxilF = OP->GetOpFunc(opcode, EltTy); |
9037 | 28 | Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP); |
9038 | 28 | Value *Args[] = {OP->GetU32Const((unsigned)opcode), |
9039 | 28 | handle, |
9040 | 28 | bufIdx, |
9041 | 28 | offset, |
9042 | 28 | mask, |
9043 | 28 | alignment}; |
9044 | 28 | Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode)); |
9045 | | |
9046 | 56 | for (unsigned i = 0; i < resultElts.size(); i++28 ) { |
9047 | 28 | resultElts[i] = Builder.CreateExtractValue(Ld, i); |
9048 | 28 | } |
9049 | | |
9050 | | // status |
9051 | 28 | UpdateStatus(Ld, status, Builder, OP); |
9052 | 28 | return Ld; |
9053 | 28 | } |
9054 | | |
9055 | | void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset, |
9056 | | Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder, |
9057 | | ArrayRef<Value *> vals, uint8_t mask, |
9058 | 60 | Constant *alignment) { |
9059 | 60 | OP::OpCode opcode = OP::OpCode::RawBufferStore; |
9060 | 60 | DXASSERT(vals.size() == 4, "buffer store need 4 values"); |
9061 | | |
9062 | 60 | Value *Args[] = {OP->GetU32Const((unsigned)opcode), |
9063 | 60 | handle, |
9064 | 60 | bufIdx, |
9065 | 60 | offset, |
9066 | 60 | vals[0], |
9067 | 60 | vals[1], |
9068 | 60 | vals[2], |
9069 | 60 | vals[3], |
9070 | 60 | OP->GetU8Const(mask), |
9071 | 60 | alignment}; |
9072 | 60 | Function *dxilF = OP->GetOpFunc(opcode, EltTy); |
9073 | 60 | Builder.CreateCall(dxilF, Args); |
9074 | 60 | } |
9075 | | |
9076 | | Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, |
9077 | | Value *handle, HLResource::Kind RK, hlsl::OP *OP, |
9078 | | Value *status, Value *bufIdx, Value *baseOffset, |
9079 | 814 | const DataLayout &DL) { |
9080 | | |
9081 | 814 | ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status); |
9082 | | #ifndef NDEBUG |
9083 | | Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); |
9084 | | Type *matType = ptr->getType()->getPointerElementType(); |
9085 | | HLMatrixType MatTy = HLMatrixType::cast(matType); |
9086 | | DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == |
9087 | | helper.retVal->getType(), |
9088 | | "helper type should match vectorized matrix"); |
9089 | | #endif |
9090 | 814 | return TranslateBufLoad(helper, RK, Builder, OP, DL); |
9091 | 814 | } |
9092 | | |
9093 | | void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, |
9094 | | hlsl::OP *OP, Value *bufIdx, Value *baseOffset, |
9095 | 1.18k | Value *val, const DataLayout &DL) { |
9096 | 1.18k | [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType); |
9097 | 1.18k | DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(), |
9098 | 1.18k | "helper type should match vectorized matrix"); |
9099 | 1.18k | TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, |
9100 | 1.18k | baseOffset, Builder, OP); |
9101 | 1.18k | } |
9102 | | |
9103 | | void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK, |
9104 | | hlsl::OP *OP, Value *status, Value *bufIdx, |
9105 | 2.00k | Value *baseOffset, const DataLayout &DL) { |
9106 | 2.00k | IRBuilder<> Builder(CI); |
9107 | 2.00k | HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
9108 | 2.00k | unsigned opcode = GetHLOpcode(CI); |
9109 | 2.00k | DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore, |
9110 | 2.00k | "only translate matrix loadStore here."); |
9111 | 2.00k | HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode); |
9112 | | // Due to the current way the initial codegen generates matrix |
9113 | | // orientation casts, the in-register vector matrix has already been |
9114 | | // reordered based on the destination's row or column-major packing |
9115 | | // orientation. |
9116 | 2.00k | switch (matOp) { |
9117 | 242 | case HLMatLoadStoreOpcode::RowMatLoad: |
9118 | 814 | case HLMatLoadStoreOpcode::ColMatLoad: |
9119 | 814 | TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx, |
9120 | 814 | baseOffset, DL); |
9121 | 814 | break; |
9122 | 194 | case HLMatLoadStoreOpcode::RowMatStore: |
9123 | 1.18k | case HLMatLoadStoreOpcode::ColMatStore: { |
9124 | 1.18k | Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); |
9125 | 1.18k | Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); |
9126 | 1.18k | TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder, |
9127 | 1.18k | handle, OP, bufIdx, baseOffset, val, DL); |
9128 | 1.18k | } break; |
9129 | 2.00k | } |
9130 | | |
9131 | 2.00k | CI->eraseFromParent(); |
9132 | 2.00k | } |
9133 | | |
9134 | | void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, |
9135 | | HLResource::Kind ResKind, Value *bufIdx, |
9136 | | Value *baseOffset, Value *status, |
9137 | | hlsl::OP *OP, const DataLayout &DL); |
9138 | | |
9139 | | // For case like mat[i][j]. |
9140 | | // IdxList is [i][0], [i][1], [i][2],[i][3]. |
9141 | | // Idx is j. |
9142 | | // return [i][j] not mat[i][j] because resource ptr and temp ptr need different |
9143 | | // code gen. |
9144 | | static Value *LowerGEPOnMatIndexListToIndex(llvm::GetElementPtrInst *GEP, |
9145 | 24 | ArrayRef<Value *> IdxList) { |
9146 | 24 | IRBuilder<> Builder(GEP); |
9147 | 24 | Value *zero = Builder.getInt32(0); |
9148 | 24 | DXASSERT(GEP->getNumIndices() == 2, "must have 2 level"); |
9149 | 24 | Value *baseIdx = (GEP->idx_begin())->get(); |
9150 | 24 | DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0"); |
9151 | 24 | Value *Idx = (GEP->idx_begin() + 1)->get(); |
9152 | | |
9153 | 24 | if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) { |
9154 | 16 | return IdxList[immIdx->getSExtValue()]; |
9155 | 16 | } |
9156 | | |
9157 | 8 | IRBuilder<> AllocaBuilder( |
9158 | 8 | GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt()); |
9159 | 8 | unsigned size = IdxList.size(); |
9160 | | // Store idxList to temp array. |
9161 | 8 | ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size); |
9162 | 8 | Value *tempArray = AllocaBuilder.CreateAlloca(AT); |
9163 | | |
9164 | 40 | for (unsigned i = 0; i < size; i++32 ) { |
9165 | 32 | Value *EltPtr = Builder.CreateGEP(tempArray, {zero, Builder.getInt32(i)}); |
9166 | 32 | Builder.CreateStore(IdxList[i], EltPtr); |
9167 | 32 | } |
9168 | | // Load the idx. |
9169 | 8 | Value *GEPOffset = Builder.CreateGEP(tempArray, {zero, Idx}); |
9170 | 8 | return Builder.CreateLoad(GEPOffset); |
9171 | 24 | } |
9172 | | |
9173 | | // subscript operator for matrix of struct element. |
9174 | | void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, |
9175 | | HLResource::Kind ResKind, Value *bufIdx, |
9176 | | Value *baseOffset, Value *status, |
9177 | 146 | hlsl::OP *hlslOP, const DataLayout &DL) { |
9178 | 146 | unsigned opcode = GetHLOpcode(CI); |
9179 | 146 | IRBuilder<> subBuilder(CI); |
9180 | 146 | HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode); |
9181 | 146 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
9182 | 146 | HLMatrixType MatTy = |
9183 | 146 | HLMatrixType::cast(basePtr->getType()->getPointerElementType()); |
9184 | 146 | Type *EltTy = MatTy.getElementTypeForReg(); |
9185 | 146 | Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy)); |
9186 | | |
9187 | 146 | Value *EltByteSize = ConstantInt::get( |
9188 | 146 | baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL)); |
9189 | | |
9190 | 146 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
9191 | | |
9192 | 146 | Type *resultType = CI->getType()->getPointerElementType(); |
9193 | 146 | unsigned resultSize = 1; |
9194 | 146 | if (resultType->isVectorTy()) |
9195 | 90 | resultSize = resultType->getVectorNumElements(); |
9196 | 146 | DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix"); |
9197 | 146 | assert(resultSize <= 16); |
9198 | 146 | std::vector<Value *> idxList(resultSize); |
9199 | | |
9200 | 146 | switch (subOp) { |
9201 | 90 | case HLSubscriptOpcode::ColMatSubscript: |
9202 | 90 | case HLSubscriptOpcode::RowMatSubscript: { |
9203 | 274 | for (unsigned i = 0; i < resultSize; i++184 ) { |
9204 | 184 | Value *offset = |
9205 | 184 | CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); |
9206 | 184 | offset = subBuilder.CreateMul(offset, EltByteSize); |
9207 | 184 | idxList[i] = subBuilder.CreateAdd(baseOffset, offset); |
9208 | 184 | } |
9209 | 90 | } break; |
9210 | 0 | case HLSubscriptOpcode::RowMatElement: |
9211 | 56 | case HLSubscriptOpcode::ColMatElement: { |
9212 | 56 | Constant *EltIdxs = cast<Constant>(idx); |
9213 | 112 | for (unsigned i = 0; i < resultSize; i++56 ) { |
9214 | 56 | Value *offset = |
9215 | 56 | subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize); |
9216 | 56 | idxList[i] = subBuilder.CreateAdd(baseOffset, offset); |
9217 | 56 | } |
9218 | 56 | } break; |
9219 | 0 | default: |
9220 | 0 | DXASSERT(0, "invalid operation on const buffer"); |
9221 | 0 | break; |
9222 | 146 | } |
9223 | | |
9224 | 146 | Value *undefElt = UndefValue::get(EltTy); |
9225 | | |
9226 | 292 | for (auto U = CI->user_begin(); U != CI->user_end();) { |
9227 | 146 | Value *subsUser = *(U++); |
9228 | 146 | if (resultSize == 1) { |
9229 | 88 | TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle, |
9230 | 88 | ResKind, bufIdx, idxList[0], status, |
9231 | 88 | hlslOP, DL); |
9232 | 88 | continue; |
9233 | 88 | } |
9234 | 58 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) { |
9235 | 24 | Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList); |
9236 | | |
9237 | 48 | for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { |
9238 | 24 | Instruction *gepUserInst = cast<Instruction>(*(gepU++)); |
9239 | 24 | TranslateStructBufSubscriptUser(gepUserInst, handle, ResKind, bufIdx, |
9240 | 24 | GEPOffset, status, hlslOP, DL); |
9241 | 24 | } |
9242 | | |
9243 | 24 | GEP->eraseFromParent(); |
9244 | 34 | } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) { |
9245 | | // Store elements of matrix in a struct. Needs to be done one scalar at a |
9246 | | // time even for vectors in the case that matrix orientation spreads the |
9247 | | // indexed scalars throughout the matrix vector. |
9248 | 22 | IRBuilder<> stBuilder(stUser); |
9249 | 22 | Value *Val = stUser->getValueOperand(); |
9250 | 22 | if (Val->getType()->isVectorTy()) { |
9251 | 82 | for (unsigned i = 0; i < resultSize; i++60 ) { |
9252 | 60 | Value *EltVal = stBuilder.CreateExtractElement(Val, i); |
9253 | 60 | uint8_t mask = DXIL::kCompMask_X; |
9254 | 60 | GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP, |
9255 | 60 | stBuilder, {EltVal, undefElt, undefElt, undefElt}, |
9256 | 60 | mask, alignment); |
9257 | 60 | } |
9258 | 22 | } else { |
9259 | 0 | uint8_t mask = DXIL::kCompMask_X; |
9260 | 0 | GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP, |
9261 | 0 | stBuilder, {Val, undefElt, undefElt, undefElt}, |
9262 | 0 | mask, alignment); |
9263 | 0 | } |
9264 | | |
9265 | 22 | stUser->eraseFromParent(); |
9266 | 22 | } else { |
9267 | | // Must be load here. |
9268 | 12 | LoadInst *ldUser = cast<LoadInst>(subsUser); |
9269 | 12 | IRBuilder<> ldBuilder(ldUser); |
9270 | 12 | Value *ldData = UndefValue::get(resultType); |
9271 | | // Load elements of matrix in a struct. Needs to be done one scalar at a |
9272 | | // time even for vectors in the case that matrix orientation spreads the |
9273 | | // indexed scalars throughout the matrix vector. |
9274 | 12 | if (resultType->isVectorTy()) { |
9275 | 40 | for (unsigned i = 0; i < resultSize; i++28 ) { |
9276 | 28 | Value *ResultElt; |
9277 | | // TODO: This can be inefficient for row major matrix load |
9278 | 28 | GenerateRawBufLd(handle, bufIdx, idxList[i], |
9279 | 28 | /*status*/ nullptr, EltTy, ResultElt, hlslOP, |
9280 | 28 | ldBuilder, 1, alignment); |
9281 | 28 | ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i); |
9282 | 28 | } |
9283 | 12 | } else { |
9284 | 0 | GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr, EltTy, |
9285 | 0 | ldData, hlslOP, ldBuilder, 4, alignment); |
9286 | 0 | } |
9287 | 12 | ldUser->replaceAllUsesWith(ldData); |
9288 | 12 | ldUser->eraseFromParent(); |
9289 | 12 | } |
9290 | 58 | } |
9291 | | |
9292 | 146 | CI->eraseFromParent(); |
9293 | 146 | } |
9294 | | |
9295 | | void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, |
9296 | | HLResource::Kind ResKind, Value *bufIdx, |
9297 | | Value *baseOffset, Value *status, |
9298 | 37.3k | hlsl::OP *OP, const DataLayout &DL) { |
9299 | 37.3k | IRBuilder<> Builder(user); |
9300 | 37.3k | if (CallInst *userCall = dyn_cast<CallInst>(user)) { |
9301 | 3.68k | HLOpcodeGroup group = // user call? |
9302 | 3.68k | hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction()); |
9303 | 3.68k | unsigned opcode = GetHLOpcode(userCall); |
9304 | | // For case element type of structure buffer is not structure type. |
9305 | 3.68k | if (baseOffset == nullptr) |
9306 | 0 | baseOffset = OP->GetU32Const(0); |
9307 | 3.68k | if (group == HLOpcodeGroup::HLIntrinsic) { |
9308 | 1.53k | IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode); |
9309 | 1.53k | switch (IOP) { |
9310 | 0 | case IntrinsicOp::MOP_Load: { |
9311 | 0 | if (userCall->getType()->isPointerTy()) { |
9312 | | // Struct will return pointers which like [] |
9313 | |
|
9314 | 0 | } else { |
9315 | | // Use builtin types on structuredBuffer. |
9316 | 0 | } |
9317 | 0 | DXASSERT(0, "not implement yet"); |
9318 | 0 | } break; |
9319 | 364 | case IntrinsicOp::IOP_InterlockedAdd: { |
9320 | 364 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9321 | 364 | baseOffset); |
9322 | 364 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, |
9323 | 364 | Builder, OP); |
9324 | 364 | } break; |
9325 | 72 | case IntrinsicOp::IOP_InterlockedAnd: { |
9326 | 72 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9327 | 72 | baseOffset); |
9328 | 72 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, |
9329 | 72 | Builder, OP); |
9330 | 72 | } break; |
9331 | 224 | case IntrinsicOp::IOP_InterlockedExchange: { |
9332 | 224 | Type *opType = nullptr; |
9333 | 224 | PointerType *ptrType = dyn_cast<PointerType>( |
9334 | 224 | userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex) |
9335 | 224 | ->getType()); |
9336 | 224 | if (ptrType && ptrType->getElementType()->isFloatTy()) |
9337 | 12 | opType = Type::getInt32Ty(userCall->getContext()); |
9338 | 224 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9339 | 224 | baseOffset, opType); |
9340 | 224 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange, |
9341 | 224 | Builder, OP); |
9342 | 224 | } break; |
9343 | 40 | case IntrinsicOp::IOP_InterlockedMax: { |
9344 | 40 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9345 | 40 | baseOffset); |
9346 | 40 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, |
9347 | 40 | Builder, OP); |
9348 | 40 | } break; |
9349 | 40 | case IntrinsicOp::IOP_InterlockedMin: { |
9350 | 40 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9351 | 40 | baseOffset); |
9352 | 40 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, |
9353 | 40 | Builder, OP); |
9354 | 40 | } break; |
9355 | 52 | case IntrinsicOp::IOP_InterlockedUMax: { |
9356 | 52 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9357 | 52 | baseOffset); |
9358 | 52 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, |
9359 | 52 | Builder, OP); |
9360 | 52 | } break; |
9361 | 40 | case IntrinsicOp::IOP_InterlockedUMin: { |
9362 | 40 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9363 | 40 | baseOffset); |
9364 | 40 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, |
9365 | 40 | Builder, OP); |
9366 | 40 | } break; |
9367 | 96 | case IntrinsicOp::IOP_InterlockedOr: { |
9368 | 96 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9369 | 96 | baseOffset); |
9370 | 96 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, |
9371 | 96 | Builder, OP); |
9372 | 96 | } break; |
9373 | 72 | case IntrinsicOp::IOP_InterlockedXor: { |
9374 | 72 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
9375 | 72 | baseOffset); |
9376 | 72 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, |
9377 | 72 | Builder, OP); |
9378 | 72 | } break; |
9379 | 262 | case IntrinsicOp::IOP_InterlockedCompareStore: |
9380 | 508 | case IntrinsicOp::IOP_InterlockedCompareExchange: { |
9381 | 508 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange, |
9382 | 508 | handle, bufIdx, baseOffset); |
9383 | 508 | TranslateAtomicCmpXChg(helper, Builder, OP); |
9384 | 508 | } break; |
9385 | 14 | case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise: |
9386 | 28 | case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: { |
9387 | 28 | Type *i32Ty = Type::getInt32Ty(userCall->getContext()); |
9388 | 28 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange, |
9389 | 28 | handle, bufIdx, baseOffset, i32Ty); |
9390 | 28 | TranslateAtomicCmpXChg(helper, Builder, OP); |
9391 | 28 | } break; |
9392 | 0 | default: |
9393 | 0 | DXASSERT(0, "invalid opcode"); |
9394 | 0 | break; |
9395 | 1.53k | } |
9396 | 1.53k | userCall->eraseFromParent(); |
9397 | 2.14k | } else if (group == HLOpcodeGroup::HLMatLoadStore) |
9398 | | // Load/Store matrix within a struct |
9399 | 2.00k | TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx, |
9400 | 2.00k | baseOffset, DL); |
9401 | 146 | else if (group == HLOpcodeGroup::HLSubscript) { |
9402 | | // Subscript of matrix within a struct |
9403 | 146 | TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx, |
9404 | 146 | baseOffset, status, OP, DL); |
9405 | 146 | } |
9406 | 33.7k | } else if (LoadInst *LdInst = dyn_cast<LoadInst>(user)) { |
9407 | | // Load of scalar/vector within a struct or structured raw load. |
9408 | 9.18k | ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status); |
9409 | 9.18k | TranslateBufLoad(helper, ResKind, Builder, OP, DL); |
9410 | | |
9411 | 9.18k | LdInst->eraseFromParent(); |
9412 | 24.5k | } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) { |
9413 | | // Store of scalar/vector within a struct or structured raw store. |
9414 | 9.21k | Value *val = StInst->getValueOperand(); |
9415 | 9.21k | TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, |
9416 | 9.21k | baseOffset, Builder, OP); |
9417 | 9.21k | StInst->eraseFromParent(); |
9418 | 15.3k | } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) { |
9419 | | // Recurse users |
9420 | 76 | for (auto U = BCI->user_begin(); U != BCI->user_end();) { |
9421 | 46 | Value *BCIUser = *(U++); |
9422 | 46 | TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser), handle, |
9423 | 46 | ResKind, bufIdx, baseOffset, status, OP, |
9424 | 46 | DL); |
9425 | 46 | } |
9426 | 30 | BCI->eraseFromParent(); |
9427 | 15.2k | } else if (PHINode *Phi = dyn_cast<PHINode>(user)) { |
9428 | 4 | if (Phi->getNumIncomingValues() != 1) { |
9429 | 0 | dxilutil::EmitErrorOnInstruction( |
9430 | 0 | Phi, "Phi not supported for buffer subscript"); |
9431 | 0 | return; |
9432 | 0 | } |
9433 | | // Since the phi only has a single value we can safely process its |
9434 | | // users to translate the subscript. These single-value phis are |
9435 | | // inserted by the lcssa pass. |
9436 | 8 | for (auto U = Phi->user_begin(); 4 U != Phi->user_end();) { |
9437 | 4 | Value *PhiUser = *(U++); |
9438 | 4 | TranslateStructBufSubscriptUser(cast<Instruction>(PhiUser), handle, |
9439 | 4 | ResKind, bufIdx, baseOffset, status, OP, |
9440 | 4 | DL); |
9441 | 4 | } |
9442 | 4 | Phi->eraseFromParent(); |
9443 | 15.2k | } else { |
9444 | | // should only used by GEP |
9445 | 15.2k | GetElementPtrInst *GEP = cast<GetElementPtrInst>(user); |
9446 | 15.2k | Type *Ty = GEP->getType()->getPointerElementType(); |
9447 | | |
9448 | 15.2k | Value *offset = dxilutil::GEPIdxToOffset(GEP, Builder, OP, DL); |
9449 | 15.2k | DXASSERT_LOCALVAR(Ty, |
9450 | 15.2k | offset->getType() == Type::getInt32Ty(Ty->getContext()), |
9451 | 15.2k | "else bitness is wrong"); |
9452 | | // No offset into element for Raw buffers; byte offset is in bufIdx. |
9453 | 15.2k | if (DXIL::IsRawBuffer(ResKind)) |
9454 | 574 | bufIdx = Builder.CreateAdd(offset, bufIdx); |
9455 | 14.7k | else |
9456 | 14.7k | baseOffset = Builder.CreateAdd(offset, baseOffset); |
9457 | | |
9458 | 37.3k | for (auto U = GEP->user_begin(); U != GEP->user_end();) { |
9459 | 22.1k | Value *GEPUser = *(U++); |
9460 | | |
9461 | 22.1k | TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle, |
9462 | 22.1k | ResKind, bufIdx, baseOffset, status, OP, |
9463 | 22.1k | DL); |
9464 | 22.1k | } |
9465 | | // delete the inst |
9466 | 15.2k | GEP->eraseFromParent(); |
9467 | 15.2k | } |
9468 | 37.3k | } |
9469 | | |
9470 | | void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, |
9471 | | hlsl::OP *OP, HLResource::Kind ResKind, |
9472 | 13.0k | const DataLayout &DL) { |
9473 | 13.0k | Value *subscriptIndex = |
9474 | 13.0k | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); |
9475 | 13.0k | Value *bufIdx = nullptr; |
9476 | 13.0k | Value *offset = nullptr; |
9477 | 13.0k | bufIdx = subscriptIndex; |
9478 | 13.0k | if (ResKind == HLResource::Kind::RawBuffer) |
9479 | 284 | offset = UndefValue::get(Type::getInt32Ty(CI->getContext())); |
9480 | 12.7k | else |
9481 | | // StructuredBuffer, TypedBuffer, etc. |
9482 | 12.7k | offset = OP->GetU32Const(0); |
9483 | | |
9484 | 28.1k | for (auto U = CI->user_begin(); U != CI->user_end();) { |
9485 | 15.1k | Value *user = *(U++); |
9486 | | |
9487 | 15.1k | TranslateStructBufSubscriptUser(cast<Instruction>(user), handle, ResKind, |
9488 | 15.1k | bufIdx, offset, status, OP, DL); |
9489 | 15.1k | } |
9490 | 13.0k | } |
9491 | | } // namespace |
9492 | | |
9493 | | // HLSubscript. |
9494 | | namespace { |
9495 | | |
9496 | | Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK, |
9497 | | DXIL::ResourceClass RC, Value *handle, |
9498 | | LoadInst *ldInst, IRBuilder<> &Builder, |
9499 | 2.81k | hlsl::OP *hlslOP, const DataLayout &DL) { |
9500 | | // The arguments to the call instruction are used to determine the access, |
9501 | | // the return value and type come from the load instruction. |
9502 | 2.81k | ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst); |
9503 | 2.81k | TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); |
9504 | | // delete the ld |
9505 | 2.81k | ldInst->eraseFromParent(); |
9506 | 2.81k | return ldHelper.retVal; |
9507 | 2.81k | } |
9508 | | |
9509 | | Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx, |
9510 | 16 | unsigned vectorSize, Instruction *InsertPt) { |
9511 | 16 | IRBuilder<> Builder(InsertPt); |
9512 | 16 | if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) { |
9513 | 8 | VecVal = |
9514 | 8 | Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue()); |
9515 | 8 | } else { |
9516 | 8 | BasicBlock *BB = InsertPt->getParent(); |
9517 | 8 | BasicBlock *EndBB = BB->splitBasicBlock(InsertPt); |
9518 | | |
9519 | 8 | TerminatorInst *TI = BB->getTerminator(); |
9520 | 8 | IRBuilder<> SwitchBuilder(TI); |
9521 | 8 | LLVMContext &Ctx = InsertPt->getContext(); |
9522 | | |
9523 | 8 | SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize); |
9524 | 8 | TI->eraseFromParent(); |
9525 | | |
9526 | 8 | Function *F = EndBB->getParent(); |
9527 | 8 | IRBuilder<> endSwitchBuilder(EndBB->begin()); |
9528 | 8 | Type *Ty = VecVal->getType(); |
9529 | 8 | PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1); |
9530 | | |
9531 | 40 | for (unsigned i = 0; i < vectorSize; i++32 ) { |
9532 | 32 | BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB); |
9533 | 32 | Switch->addCase(SwitchBuilder.getInt32(i), CaseBB); |
9534 | 32 | IRBuilder<> CaseBuilder(CaseBB); |
9535 | | |
9536 | 32 | Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i); |
9537 | 32 | VecPhi->addIncoming(CaseVal, CaseBB); |
9538 | 32 | CaseBuilder.CreateBr(EndBB); |
9539 | 32 | } |
9540 | 8 | VecPhi->addIncoming(VecVal, BB); |
9541 | 8 | VecVal = VecPhi; |
9542 | 8 | } |
9543 | 16 | return VecVal; |
9544 | 16 | } |
9545 | | |
9546 | | void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper, |
9547 | | HLObjectOperationLowerHelper *pObjHelper, |
9548 | 8.46k | bool &Translated) { |
9549 | 8.46k | Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
9550 | | |
9551 | 8.46k | hlsl::OP *hlslOP = &helper.hlslOP; |
9552 | | // Resource ptr. |
9553 | 8.46k | Value *handle = ptr; |
9554 | 8.46k | DXIL::ResourceClass RC = pObjHelper->GetRC(handle); |
9555 | 8.46k | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
9556 | | |
9557 | 8.46k | Type *Ty = CI->getType()->getPointerElementType(); |
9558 | | |
9559 | 17.2k | for (auto It = CI->user_begin(); It != CI->user_end();) { |
9560 | 8.75k | User *user = *(It++); |
9561 | 8.75k | Instruction *I = cast<Instruction>(user); |
9562 | 8.75k | IRBuilder<> Builder(I); |
9563 | 8.75k | Value *UndefI = UndefValue::get(Builder.getInt32Ty()); |
9564 | 8.75k | if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) { |
9565 | 2.77k | TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP, |
9566 | 2.77k | helper.dataLayout); |
9567 | 5.97k | } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) { |
9568 | 3.27k | Value *val = stInst->getValueOperand(); |
9569 | 3.27k | TranslateStore(RK, handle, val, |
9570 | 3.27k | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), |
9571 | 3.27k | UndefI, Builder, hlslOP); |
9572 | | // delete the st |
9573 | 3.27k | stInst->eraseFromParent(); |
9574 | 3.27k | } else if (GetElementPtrInst *2.70k GEP2.70k = dyn_cast<GetElementPtrInst>(user)) { |
9575 | | // Must be vector type here. |
9576 | 56 | unsigned vectorSize = Ty->getVectorNumElements(); |
9577 | 56 | DXASSERT_NOMSG(GEP->getNumIndices() == 2); |
9578 | 56 | Use *GEPIdx = GEP->idx_begin(); |
9579 | 56 | GEPIdx++; |
9580 | 56 | Value *EltIdx = *GEPIdx; |
9581 | 96 | for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) { |
9582 | 56 | User *GEPUser = *(GEPIt++); |
9583 | 56 | if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) { |
9584 | 16 | IRBuilder<> StBuilder(SI); |
9585 | | // Generate Ld. |
9586 | 16 | LoadInst *tmpLd = StBuilder.CreateLoad(CI); |
9587 | | |
9588 | 16 | Value *ldVal = TranslateTypedBufSubscript( |
9589 | 16 | CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout); |
9590 | | // Update vector. |
9591 | 16 | ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx, |
9592 | 16 | vectorSize, SI); |
9593 | | // Generate St. |
9594 | | // Reset insert point, UpdateVectorElt may move SI to different block. |
9595 | 16 | StBuilder.SetInsertPoint(SI); |
9596 | 16 | TranslateStore( |
9597 | 16 | RK, handle, ldVal, |
9598 | 16 | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI, |
9599 | 16 | StBuilder, hlslOP); |
9600 | 16 | SI->eraseFromParent(); |
9601 | 16 | continue; |
9602 | 16 | } |
9603 | 40 | if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) { |
9604 | 24 | IRBuilder<> LdBuilder(LI); |
9605 | | |
9606 | | // Generate tmp vector load with vector type & translate it |
9607 | 24 | LoadInst *tmpLd = LdBuilder.CreateLoad(CI); |
9608 | | |
9609 | 24 | Value *ldVal = TranslateTypedBufSubscript( |
9610 | 24 | CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout); |
9611 | | |
9612 | | // get the single element |
9613 | 24 | ldVal = GenerateVecEltFromGEP(ldVal, GEP, LdBuilder, |
9614 | 24 | /*bInsertLdNextToGEP*/ false); |
9615 | | |
9616 | 24 | LI->replaceAllUsesWith(ldVal); |
9617 | 24 | LI->eraseFromParent(); |
9618 | 24 | continue; |
9619 | 24 | } |
9620 | | // Invalid operations. |
9621 | 16 | Translated = false; |
9622 | 16 | dxilutil::EmitErrorOnInstruction(GEP, |
9623 | 16 | "Invalid operation on typed buffer."); |
9624 | 16 | return; |
9625 | 40 | } |
9626 | 40 | GEP->eraseFromParent(); |
9627 | 2.64k | } else { |
9628 | 2.64k | CallInst *userCall = cast<CallInst>(user); |
9629 | 2.64k | HLOpcodeGroup group = |
9630 | 2.64k | hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction()); |
9631 | 2.64k | unsigned opcode = hlsl::GetHLOpcode(userCall); |
9632 | 2.64k | if (group == HLOpcodeGroup::HLIntrinsic) { |
9633 | 2.64k | IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode); |
9634 | 2.64k | if (RC == DXIL::ResourceClass::SRV) { |
9635 | | // Invalid operations. |
9636 | 0 | Translated = false; |
9637 | 0 | dxilutil::EmitErrorOnInstruction(userCall, |
9638 | 0 | "Invalid operation on SRV."); |
9639 | 0 | return; |
9640 | 0 | } |
9641 | 2.64k | switch (IOP) { |
9642 | 370 | case IntrinsicOp::IOP_InterlockedAdd: { |
9643 | 370 | ResLoadHelper helper(CI, RK, RC, handle, |
9644 | 370 | IntrinsicOp::IOP_InterlockedAdd); |
9645 | 370 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9646 | 370 | helper.addr, /*offset*/ nullptr); |
9647 | 370 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add, |
9648 | 370 | Builder, hlslOP); |
9649 | 370 | } break; |
9650 | 192 | case IntrinsicOp::IOP_InterlockedAnd: { |
9651 | 192 | ResLoadHelper helper(CI, RK, RC, handle, |
9652 | 192 | IntrinsicOp::IOP_InterlockedAnd); |
9653 | 192 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9654 | 192 | helper.addr, /*offset*/ nullptr); |
9655 | 192 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And, |
9656 | 192 | Builder, hlslOP); |
9657 | 192 | } break; |
9658 | 356 | case IntrinsicOp::IOP_InterlockedExchange: { |
9659 | 356 | ResLoadHelper helper(CI, RK, RC, handle, |
9660 | 356 | IntrinsicOp::IOP_InterlockedExchange); |
9661 | 356 | Type *opType = nullptr; |
9662 | 356 | PointerType *ptrType = dyn_cast<PointerType>( |
9663 | 356 | userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex) |
9664 | 356 | ->getType()); |
9665 | 356 | if (ptrType && ptrType->getElementType()->isFloatTy()) |
9666 | 12 | opType = Type::getInt32Ty(userCall->getContext()); |
9667 | 356 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9668 | 356 | helper.addr, /*offset*/ nullptr, opType); |
9669 | 356 | TranslateAtomicBinaryOperation( |
9670 | 356 | atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP); |
9671 | 356 | } break; |
9672 | 108 | case IntrinsicOp::IOP_InterlockedMax: { |
9673 | 108 | ResLoadHelper helper(CI, RK, RC, handle, |
9674 | 108 | IntrinsicOp::IOP_InterlockedMax); |
9675 | 108 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9676 | 108 | helper.addr, /*offset*/ nullptr); |
9677 | 108 | TranslateAtomicBinaryOperation( |
9678 | 108 | atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP); |
9679 | 108 | } break; |
9680 | 108 | case IntrinsicOp::IOP_InterlockedMin: { |
9681 | 108 | ResLoadHelper helper(CI, RK, RC, handle, |
9682 | 108 | IntrinsicOp::IOP_InterlockedMin); |
9683 | 108 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9684 | 108 | helper.addr, /*offset*/ nullptr); |
9685 | 108 | TranslateAtomicBinaryOperation( |
9686 | 108 | atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP); |
9687 | 108 | } break; |
9688 | 116 | case IntrinsicOp::IOP_InterlockedUMax: { |
9689 | 116 | ResLoadHelper helper(CI, RK, RC, handle, |
9690 | 116 | IntrinsicOp::IOP_InterlockedUMax); |
9691 | 116 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9692 | 116 | helper.addr, /*offset*/ nullptr); |
9693 | 116 | TranslateAtomicBinaryOperation( |
9694 | 116 | atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP); |
9695 | 116 | } break; |
9696 | 116 | case IntrinsicOp::IOP_InterlockedUMin: { |
9697 | 116 | ResLoadHelper helper(CI, RK, RC, handle, |
9698 | 116 | IntrinsicOp::IOP_InterlockedUMin); |
9699 | 116 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9700 | 116 | helper.addr, /*offset*/ nullptr); |
9701 | 116 | TranslateAtomicBinaryOperation( |
9702 | 116 | atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP); |
9703 | 116 | } break; |
9704 | 200 | case IntrinsicOp::IOP_InterlockedOr: { |
9705 | 200 | ResLoadHelper helper(CI, RK, RC, handle, |
9706 | 200 | IntrinsicOp::IOP_InterlockedOr); |
9707 | 200 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9708 | 200 | helper.addr, /*offset*/ nullptr); |
9709 | 200 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or, |
9710 | 200 | Builder, hlslOP); |
9711 | 200 | } break; |
9712 | 192 | case IntrinsicOp::IOP_InterlockedXor: { |
9713 | 192 | ResLoadHelper helper(CI, RK, RC, handle, |
9714 | 192 | IntrinsicOp::IOP_InterlockedXor); |
9715 | 192 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9716 | 192 | helper.addr, /*offset*/ nullptr); |
9717 | 192 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor, |
9718 | 192 | Builder, hlslOP); |
9719 | 192 | } break; |
9720 | 442 | case IntrinsicOp::IOP_InterlockedCompareStore: |
9721 | 860 | case IntrinsicOp::IOP_InterlockedCompareExchange: { |
9722 | 860 | ResLoadHelper helper(CI, RK, RC, handle, |
9723 | 860 | IntrinsicOp::IOP_InterlockedCompareExchange); |
9724 | 860 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange, |
9725 | 860 | handle, helper.addr, /*offset*/ nullptr); |
9726 | 860 | TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP); |
9727 | 860 | } break; |
9728 | 14 | case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise: |
9729 | 28 | case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: { |
9730 | 28 | Type *i32Ty = Type::getInt32Ty(userCall->getContext()); |
9731 | 28 | ResLoadHelper helper(CI, RK, RC, handle, |
9732 | 28 | IntrinsicOp::IOP_InterlockedCompareExchange); |
9733 | 28 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange, |
9734 | 28 | handle, helper.addr, /*offset*/ nullptr, |
9735 | 28 | i32Ty); |
9736 | 28 | TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP); |
9737 | 28 | } break; |
9738 | 0 | default: |
9739 | 0 | DXASSERT(0, "invalid opcode"); |
9740 | 0 | break; |
9741 | 2.64k | } |
9742 | 2.64k | } else { |
9743 | 0 | DXASSERT(0, "invalid group"); |
9744 | 0 | } |
9745 | 2.64k | userCall->eraseFromParent(); |
9746 | 2.64k | } |
9747 | 8.75k | } |
9748 | 8.46k | } |
9749 | | } // namespace |
9750 | | |
9751 | | void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, |
9752 | | HLOperationLowerHelper &helper, |
9753 | | HLObjectOperationLowerHelper *pObjHelper, |
9754 | 29.8k | bool &Translated) { |
9755 | 29.8k | if (CI->user_empty()) { |
9756 | 0 | Translated = true; |
9757 | 0 | return; |
9758 | 0 | } |
9759 | 29.8k | hlsl::OP *hlslOP = &helper.hlslOP; |
9760 | | |
9761 | 29.8k | Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
9762 | 29.8k | if (opcode == HLSubscriptOpcode::CBufferSubscript) { |
9763 | 8.73k | dxilutil::MergeGepUse(CI); |
9764 | | // Resource ptr. |
9765 | 8.73k | Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
9766 | 8.73k | TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys, |
9767 | 8.73k | helper.dataLayout, pObjHelper); |
9768 | 8.73k | Translated = true; |
9769 | 8.73k | return; |
9770 | 8.73k | } |
9771 | | |
9772 | 21.1k | if (opcode == HLSubscriptOpcode::DoubleSubscript) { |
9773 | | // Resource ptr. |
9774 | 180 | Value *handle = ptr; |
9775 | 180 | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
9776 | 180 | Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); |
9777 | 180 | Value *mipLevel = |
9778 | 180 | CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx); |
9779 | | |
9780 | 180 | auto U = CI->user_begin(); |
9781 | 180 | DXASSERT(CI->hasOneUse(), "subscript should only have one use"); |
9782 | 180 | IRBuilder<> Builder(CI); |
9783 | 180 | if (LoadInst *ldInst = dyn_cast<LoadInst>(*U)) { |
9784 | 140 | Value *Offset = UndefValue::get(Builder.getInt32Ty()); |
9785 | 140 | ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, |
9786 | 140 | /*status*/ nullptr, mipLevel); |
9787 | 140 | TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); |
9788 | 140 | ldInst->eraseFromParent(); |
9789 | 140 | } else { |
9790 | 40 | StoreInst *stInst = cast<StoreInst>(*U); |
9791 | 40 | Value *val = stInst->getValueOperand(); |
9792 | 40 | Value *UndefI = UndefValue::get(Builder.getInt32Ty()); |
9793 | 40 | TranslateStore(RK, handle, val, |
9794 | 40 | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), |
9795 | 40 | UndefI, Builder, hlslOP, mipLevel); |
9796 | 40 | stInst->eraseFromParent(); |
9797 | 40 | } |
9798 | 180 | Translated = true; |
9799 | 180 | return; |
9800 | 180 | } |
9801 | | |
9802 | 20.9k | Type *HandleTy = hlslOP->GetHandleType(); |
9803 | 20.9k | if (ptr->getType() == hlslOP->GetNodeRecordHandleType()) { |
9804 | 0 | DXASSERT(false, "Shouldn't get here, NodeRecord subscripts should have " |
9805 | 0 | "been lowered in LowerRecordAccessToGetNodeRecordPtr"); |
9806 | 0 | return; |
9807 | 0 | } |
9808 | | |
9809 | 20.9k | if (ptr->getType() == HandleTy) { |
9810 | | // Resource ptr. |
9811 | 20.4k | Value *handle = ptr; |
9812 | 20.4k | DXIL::ResourceKind RK = DxilResource::Kind::Invalid; |
9813 | 20.4k | Type *ObjTy = nullptr; |
9814 | 20.4k | Type *RetTy = nullptr; |
9815 | 20.4k | RK = pObjHelper->GetRK(handle); |
9816 | 20.4k | if (RK == DxilResource::Kind::Invalid) { |
9817 | 0 | Translated = false; |
9818 | 0 | return; |
9819 | 0 | } |
9820 | 20.4k | ObjTy = pObjHelper->GetResourceType(handle); |
9821 | 20.4k | RetTy = ObjTy->getStructElementType(0); |
9822 | 20.4k | Translated = true; |
9823 | | |
9824 | 20.4k | if (DXIL::IsStructuredBuffer(RK)) |
9825 | 11.9k | TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK, |
9826 | 11.9k | helper.dataLayout); |
9827 | 8.46k | else |
9828 | 8.46k | TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated); |
9829 | | |
9830 | 20.4k | return; |
9831 | 20.4k | } |
9832 | | |
9833 | 498 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
9834 | 498 | if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) { |
9835 | | // Translate matrix into vector of array for share memory or local |
9836 | | // variable should be done in HLMatrixLowerPass |
9837 | 0 | DXASSERT_NOMSG(0); |
9838 | 0 | Translated = true; |
9839 | 0 | return; |
9840 | 0 | } |
9841 | | |
9842 | | // Other case should be take care in TranslateStructBufSubscript or |
9843 | | // TranslateCBOperations. |
9844 | 498 | Translated = false; |
9845 | 498 | } |
9846 | | |
9847 | | void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, |
9848 | 12.7k | HLObjectOperationLowerHelper *pObjHelper) { |
9849 | 42.6k | for (auto U = F->user_begin(); U != F->user_end();) { |
9850 | 29.8k | Value *user = *(U++); |
9851 | 29.8k | if (!isa<Instruction>(user)) |
9852 | 0 | continue; |
9853 | | // must be call inst |
9854 | 29.8k | CallInst *CI = cast<CallInst>(user); |
9855 | 29.8k | unsigned opcode = GetHLOpcode(CI); |
9856 | 29.8k | bool Translated = true; |
9857 | 29.8k | TranslateHLSubscript(CI, static_cast<HLSubscriptOpcode>(opcode), helper, |
9858 | 29.8k | pObjHelper, Translated); |
9859 | 29.8k | if (Translated) { |
9860 | | // delete the call |
9861 | 29.3k | DXASSERT(CI->use_empty(), |
9862 | 29.3k | "else TranslateHLSubscript didn't replace/erase uses"); |
9863 | 29.3k | CI->eraseFromParent(); |
9864 | 29.3k | } |
9865 | 29.8k | } |
9866 | 12.7k | } |
9867 | | |
9868 | | // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast |
9869 | | // of alloca, and return load from alloca If bOrigAllocaTy is true: create |
9870 | | // alloca of old type instead, write to alloca, and return load from bitcast of |
9871 | | // alloca |
9872 | | static Instruction *BitCastValueOrPtr(Value *V, Instruction *Insert, Type *Ty, |
9873 | | bool bOrigAllocaTy = false, |
9874 | 164 | const Twine &Name = "") { |
9875 | 164 | IRBuilder<> Builder(Insert); |
9876 | 164 | if (Ty->isPointerTy()) { |
9877 | | // If pointer, we can bitcast directly |
9878 | 0 | return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name)); |
9879 | 0 | } |
9880 | | |
9881 | | // If value, we have to alloca, store to bitcast ptr, and load |
9882 | 164 | IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert)); |
9883 | 164 | Type *allocaTy = bOrigAllocaTy ? V->getType()0 : Ty; |
9884 | 164 | Type *otherTy = bOrigAllocaTy ? Ty0 : V->getType(); |
9885 | 164 | Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy); |
9886 | 164 | Instruction *bitCast = cast<Instruction>( |
9887 | 164 | Builder.CreateBitCast(allocaInst, otherTy->getPointerTo())); |
9888 | 164 | Builder.CreateStore(V, bOrigAllocaTy ? allocaInst0 : bitCast); |
9889 | 164 | return Builder.CreateLoad(bOrigAllocaTy ? bitCast0 : allocaInst, Name); |
9890 | 164 | } |
9891 | | |
9892 | | static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, |
9893 | 0 | unsigned toRows, unsigned toCols) { |
9894 | 0 | SmallVector<int, 16> castMask(toCols * toRows); |
9895 | 0 | unsigned idx = 0; |
9896 | 0 | for (unsigned r = 0; r < toRows; r++) |
9897 | 0 | for (unsigned c = 0; c < toCols; c++) |
9898 | 0 | castMask[idx++] = c * toRows + r; |
9899 | 0 | return cast<Instruction>( |
9900 | 0 | Builder.CreateShuffleVector(vecVal, vecVal, castMask)); |
9901 | 0 | } |
9902 | | |
9903 | | void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper, |
9904 | | hlsl::HLOpcodeGroup group, |
9905 | 86.5k | HLObjectOperationLowerHelper *pObjHelper) { |
9906 | 86.5k | if (group == HLOpcodeGroup::HLIntrinsic) { |
9907 | | // map to dxil operations |
9908 | 89.4k | for (auto U = F->user_begin(); U != F->user_end();) { |
9909 | 65.8k | Value *User = *(U++); |
9910 | 65.8k | if (!isa<Instruction>(User)) |
9911 | 0 | continue; |
9912 | | // must be call inst |
9913 | 65.8k | CallInst *CI = cast<CallInst>(User); |
9914 | | |
9915 | | // Keep the instruction to lower by other function. |
9916 | 65.8k | bool Translated = true; |
9917 | | |
9918 | 65.8k | TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated); |
9919 | | |
9920 | 65.8k | if (Translated) { |
9921 | | // delete the call |
9922 | 64.8k | DXASSERT(CI->use_empty(), |
9923 | 64.8k | "else TranslateBuiltinIntrinsic didn't replace/erase uses"); |
9924 | 64.8k | CI->eraseFromParent(); |
9925 | 64.8k | } |
9926 | 65.8k | } |
9927 | 63.0k | } else { |
9928 | 63.0k | if (group == HLOpcodeGroup::HLMatLoadStore) { |
9929 | | // Both ld/st use arg1 for the pointer. |
9930 | 0 | Type *PtrTy = |
9931 | 0 | F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx); |
9932 | |
|
9933 | 0 | if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) { |
9934 | | // Translate matrix into vector of array for shared memory |
9935 | | // variable should be done in HLMatrixLowerPass. |
9936 | 0 | if (!F->user_empty()) |
9937 | 0 | F->getContext().emitError("Fail to lower matrix load/store."); |
9938 | 0 | } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) { |
9939 | | // Default address space may be function argument in lib target |
9940 | 0 | if (!F->user_empty()) { |
9941 | 0 | for (auto U = F->user_begin(); U != F->user_end();) { |
9942 | 0 | Value *User = *(U++); |
9943 | 0 | if (!isa<Instruction>(User)) |
9944 | 0 | continue; |
9945 | | // must be call inst |
9946 | 0 | CallInst *CI = cast<CallInst>(User); |
9947 | 0 | IRBuilder<> Builder(CI); |
9948 | 0 | HLMatLoadStoreOpcode opcode = |
9949 | 0 | static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI)); |
9950 | 0 | switch (opcode) { |
9951 | 0 | case HLMatLoadStoreOpcode::ColMatStore: |
9952 | 0 | case HLMatLoadStoreOpcode::RowMatStore: { |
9953 | 0 | Value *vecVal = |
9954 | 0 | CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); |
9955 | 0 | Value *matPtr = |
9956 | 0 | CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); |
9957 | 0 | matPtr = SkipAddrSpaceCast(matPtr); |
9958 | 0 | unsigned addrSpace = |
9959 | 0 | cast<PointerType>(matPtr->getType())->getAddressSpace(); |
9960 | |
|
9961 | 0 | Value *castPtr = Builder.CreateBitCast( |
9962 | 0 | matPtr, vecVal->getType()->getPointerTo(addrSpace)); |
9963 | 0 | Builder.CreateStore(vecVal, castPtr); |
9964 | 0 | CI->eraseFromParent(); |
9965 | 0 | } break; |
9966 | 0 | case HLMatLoadStoreOpcode::ColMatLoad: |
9967 | 0 | case HLMatLoadStoreOpcode::RowMatLoad: { |
9968 | 0 | Value *matPtr = |
9969 | 0 | CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); |
9970 | 0 | matPtr = SkipAddrSpaceCast(matPtr); |
9971 | 0 | unsigned addrSpace = |
9972 | 0 | cast<PointerType>(matPtr->getType())->getAddressSpace(); |
9973 | 0 | Value *castPtr = Builder.CreateBitCast( |
9974 | 0 | matPtr, CI->getType()->getPointerTo(addrSpace)); |
9975 | 0 | Value *vecVal = Builder.CreateLoad(castPtr); |
9976 | 0 | CI->replaceAllUsesWith(vecVal); |
9977 | 0 | CI->eraseFromParent(); |
9978 | 0 | } break; |
9979 | 0 | } |
9980 | 0 | } |
9981 | 0 | } |
9982 | 0 | } |
9983 | 63.0k | } else if (group == HLOpcodeGroup::HLCast) { |
9984 | | // HLCast may be used on matrix value function argument in lib target |
9985 | 2.03k | if (!F->user_empty()) { |
9986 | 5.46k | for (auto U = F->user_begin(); U != F->user_end();) { |
9987 | 3.42k | Value *User = *(U++); |
9988 | 3.42k | if (!isa<Instruction>(User)) |
9989 | 0 | continue; |
9990 | | // must be call inst |
9991 | 3.42k | CallInst *CI = cast<CallInst>(User); |
9992 | 3.42k | IRBuilder<> Builder(CI); |
9993 | 3.42k | HLCastOpcode opcode = |
9994 | 3.42k | static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI)); |
9995 | 3.42k | bool bTranspose = false; |
9996 | 3.42k | bool bColDest = false; |
9997 | 3.42k | switch (opcode) { |
9998 | 0 | case HLCastOpcode::RowMatrixToColMatrix: |
9999 | 0 | bColDest = true; |
10000 | 0 | LLVM_FALLTHROUGH; |
10001 | 0 | case HLCastOpcode::ColMatrixToRowMatrix: |
10002 | 0 | bTranspose = true; |
10003 | 0 | LLVM_FALLTHROUGH; |
10004 | 78 | case HLCastOpcode::ColMatrixToVecCast: |
10005 | 164 | case HLCastOpcode::RowMatrixToVecCast: { |
10006 | 164 | Value *matVal = |
10007 | 164 | CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx); |
10008 | 164 | Value *vecVal = |
10009 | 164 | BitCastValueOrPtr(matVal, CI, CI->getType(), |
10010 | 164 | /*bOrigAllocaTy*/ false, matVal->getName()); |
10011 | 164 | if (bTranspose) { |
10012 | 0 | HLMatrixType MatTy = HLMatrixType::cast(matVal->getType()); |
10013 | 0 | unsigned row = MatTy.getNumRows(); |
10014 | 0 | unsigned col = MatTy.getNumColumns(); |
10015 | 0 | if (bColDest) |
10016 | 0 | std::swap(row, col); |
10017 | 0 | vecVal = CreateTransposeShuffle(Builder, vecVal, row, col); |
10018 | 0 | } |
10019 | 164 | CI->replaceAllUsesWith(vecVal); |
10020 | 164 | CI->eraseFromParent(); |
10021 | 164 | } break; |
10022 | 3.42k | } |
10023 | 3.42k | } |
10024 | 2.03k | } |
10025 | 60.9k | } else if (group == HLOpcodeGroup::HLSubscript) { |
10026 | 12.7k | TranslateSubscriptOperation(F, helper, pObjHelper); |
10027 | 12.7k | } |
10028 | | // map to math function or llvm ir |
10029 | 63.0k | } |
10030 | 86.5k | } |
10031 | | |
10032 | | typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap; |
10033 | | static void TranslateHLExtension(Function *F, |
10034 | | HLSLExtensionsCodegenHelper *helper, |
10035 | | OP &hlslOp, |
10036 | 68 | HLObjectOperationLowerHelper &objHelper) { |
10037 | | // Find all calls to the function F. |
10038 | | // Store the calls in a vector for now to be replaced the loop below. |
10039 | | // We use a two step "find then replace" to avoid removing uses while |
10040 | | // iterating. |
10041 | 68 | SmallVector<CallInst *, 8> CallsToReplace; |
10042 | 72 | for (User *U : F->users()) { |
10043 | 72 | if (CallInst *CI = dyn_cast<CallInst>(U)) { |
10044 | 72 | CallsToReplace.push_back(CI); |
10045 | 72 | } |
10046 | 72 | } |
10047 | | |
10048 | | // Get the lowering strategy to use for this intrinsic. |
10049 | 68 | llvm::StringRef LowerStrategy = GetHLLowerStrategy(F); |
10050 | 68 | HLObjectExtensionLowerHelper extObjHelper(objHelper); |
10051 | 68 | ExtensionLowering lower(LowerStrategy, helper, hlslOp, extObjHelper); |
10052 | | |
10053 | | // Replace all calls that were successfully translated. |
10054 | 72 | for (CallInst *CI : CallsToReplace) { |
10055 | 72 | Value *Result = lower.Translate(CI); |
10056 | 72 | if (Result && Result != CI) { |
10057 | 72 | CI->replaceAllUsesWith(Result); |
10058 | 72 | CI->eraseFromParent(); |
10059 | 72 | } |
10060 | 72 | } |
10061 | 68 | } |
10062 | | |
10063 | | namespace hlsl { |
10064 | | |
10065 | | void TranslateBuiltinOperations( |
10066 | | HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper, |
10067 | 20.4k | std::unordered_set<Instruction *> &UpdateCounterSet) { |
10068 | 20.4k | HLOperationLowerHelper helper(HLM); |
10069 | | |
10070 | 20.4k | HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet}; |
10071 | | |
10072 | 20.4k | Module *M = HLM.GetModule(); |
10073 | | |
10074 | 20.4k | SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics; |
10075 | | |
10076 | | // generate dxil operation |
10077 | 197k | for (iplist<Function>::iterator F : M->getFunctionList()) { |
10078 | 197k | if (F->user_empty()) |
10079 | 34.8k | continue; |
10080 | 162k | if (!F->isDeclaration()) { |
10081 | 172 | continue; |
10082 | 172 | } |
10083 | 162k | hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); |
10084 | 162k | if (group == HLOpcodeGroup::NotHL) { |
10085 | | // Nothing to do. |
10086 | 75.8k | continue; |
10087 | 75.8k | } |
10088 | 86.6k | if (group == HLOpcodeGroup::HLExtIntrinsic) { |
10089 | 68 | TranslateHLExtension(F, extCodegenHelper, helper.hlslOP, objHelper); |
10090 | 68 | continue; |
10091 | 68 | } |
10092 | 86.5k | if (group == HLOpcodeGroup::HLIntrinsic) { |
10093 | 23.5k | CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst |
10094 | 23.5k | unsigned opcode = hlsl::GetHLOpcode(CI); |
10095 | 23.5k | if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) { |
10096 | 116 | NonUniformResourceIndexIntrinsics.push_back(F); |
10097 | 116 | continue; |
10098 | 116 | } |
10099 | 23.5k | } |
10100 | 86.4k | TranslateHLBuiltinOperation(F, helper, group, &objHelper); |
10101 | 86.4k | } |
10102 | | |
10103 | | // Translate last so value placed in NonUniformSet is still valid. |
10104 | 20.4k | if (!NonUniformResourceIndexIntrinsics.empty()) { |
10105 | 116 | for (auto F : NonUniformResourceIndexIntrinsics) { |
10106 | 116 | TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic, |
10107 | 116 | &objHelper); |
10108 | 116 | } |
10109 | 90 | } |
10110 | 20.4k | } |
10111 | | |
10112 | | void EmitGetNodeRecordPtrAndUpdateUsers(HLOperationLowerHelper &helper, |
10113 | 656 | CallInst *CI, Value *ArrayIndex) { |
10114 | 656 | IRBuilder<> Builder(CI); |
10115 | 656 | Value *opArg = nullptr; |
10116 | 656 | Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
10117 | 656 | opArg = Builder.getInt32((unsigned)DXIL::OpCode::GetNodeRecordPtr); |
10118 | 656 | StructType *origRecordUDT = |
10119 | 656 | cast<StructType>(cast<PointerType>(CI->getType())->getElementType()); |
10120 | 656 | Type *getNodeRecordPtrRT = origRecordUDT; |
10121 | | // Translate node record type here |
10122 | 656 | auto findIt = helper.loweredTypes.find(origRecordUDT); |
10123 | 656 | if (findIt != helper.loweredTypes.end()) { |
10124 | 244 | getNodeRecordPtrRT = findIt->second; |
10125 | 412 | } else { |
10126 | 412 | getNodeRecordPtrRT = GetLoweredUDT(origRecordUDT, &helper.dxilTypeSys); |
10127 | 412 | if (origRecordUDT != getNodeRecordPtrRT) |
10128 | 112 | helper.loweredTypes[origRecordUDT] = getNodeRecordPtrRT; |
10129 | 412 | } |
10130 | 656 | getNodeRecordPtrRT = |
10131 | 656 | getNodeRecordPtrRT->getPointerTo(DXIL::kNodeRecordAddrSpace); |
10132 | 656 | Function *getNodeRecordPtr = helper.hlslOP.GetOpFunc( |
10133 | 656 | DXIL::OpCode::GetNodeRecordPtr, getNodeRecordPtrRT); |
10134 | 656 | Value *args[] = {opArg, Handle, ArrayIndex}; |
10135 | 656 | Value *NodeRecordPtr = Builder.CreateCall(getNodeRecordPtr, args); |
10136 | 656 | ReplaceUsesForLoweredUDT(CI, NodeRecordPtr); |
10137 | 656 | } |
10138 | | |
10139 | 20.4k | void LowerRecordAccessToGetNodeRecordPtr(HLModule &HLM) { |
10140 | 20.4k | Module *M = HLM.GetModule(); |
10141 | 20.4k | HLOperationLowerHelper helper(HLM); |
10142 | 164k | for (iplist<Function>::iterator F : M->getFunctionList()) { |
10143 | 164k | if (F->user_empty()) |
10144 | 32.3k | continue; |
10145 | 132k | hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); |
10146 | 132k | if (group == HLOpcodeGroup::HLSubscript) { |
10147 | 43.6k | for (auto U = F->user_begin(); U != F->user_end();) { |
10148 | 30.5k | Value *User = *(U++); |
10149 | 30.5k | if (!isa<Instruction>(User)) |
10150 | 0 | continue; |
10151 | | // must be call inst |
10152 | 30.5k | CallInst *CI = cast<CallInst>(User); |
10153 | 30.5k | HLSubscriptOpcode opcode = |
10154 | 30.5k | static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI)); |
10155 | 30.5k | if (opcode != HLSubscriptOpcode::DefaultSubscript) |
10156 | 9.47k | continue; |
10157 | | |
10158 | 21.0k | hlsl::OP *OP = &helper.hlslOP; |
10159 | 21.0k | Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
10160 | 21.0k | if (Handle->getType() != OP->GetNodeRecordHandleType()) { |
10161 | 20.4k | continue; |
10162 | 20.4k | } |
10163 | | |
10164 | 656 | Value *Index = CI->getNumArgOperands() > 2 |
10165 | 656 | ? CI->getArgOperand(2)328 |
10166 | 656 | : ConstantInt::get(helper.i32Ty, 0)328 ; |
10167 | 656 | EmitGetNodeRecordPtrAndUpdateUsers(helper, CI, Index); |
10168 | 656 | CI->eraseFromParent(); |
10169 | 656 | } |
10170 | 13.0k | } |
10171 | 132k | } |
10172 | 20.4k | } |
10173 | | } // namespace hlsl |