/home/runner/work/DirectXShaderCompiler/DirectXShaderCompiler/lib/HLSL/HLOperationLower.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | // // |
3 | | // HLOperationLower.cpp // |
4 | | // Copyright (C) Microsoft Corporation. All rights reserved. // |
5 | | // This file is distributed under the University of Illinois Open Source // |
6 | | // License. See LICENSE.TXT for details. // |
7 | | // // |
8 | | // Lower functions to lower HL operations to DXIL operations. // |
9 | | // // |
10 | | /////////////////////////////////////////////////////////////////////////////// |
11 | | |
12 | | #include "dxc/DXIL/DxilConstants.h" |
13 | | #define _USE_MATH_DEFINES |
14 | | #include <array> |
15 | | #include <cmath> |
16 | | #include <functional> |
17 | | #include <unordered_set> |
18 | | |
19 | | #include "dxc/DXIL/DxilConstants.h" |
20 | | #include "dxc/DXIL/DxilInstructions.h" |
21 | | #include "dxc/DXIL/DxilModule.h" |
22 | | #include "dxc/DXIL/DxilOperations.h" |
23 | | #include "dxc/DXIL/DxilResourceProperties.h" |
24 | | #include "dxc/DXIL/DxilUtil.h" |
25 | | #include "dxc/HLSL/DxilPoisonValues.h" |
26 | | #include "dxc/HLSL/HLLowerUDT.h" |
27 | | #include "dxc/HLSL/HLMatrixLowerHelper.h" |
28 | | #include "dxc/HLSL/HLMatrixType.h" |
29 | | #include "dxc/HLSL/HLModule.h" |
30 | | #include "dxc/HLSL/HLOperationLower.h" |
31 | | #include "dxc/HLSL/HLOperationLowerExtension.h" |
32 | | #include "dxc/HLSL/HLOperations.h" |
33 | | #include "dxc/HlslIntrinsicOp.h" |
34 | | |
35 | | #include "llvm/ADT/APSInt.h" |
36 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
37 | | #include "llvm/IR/IRBuilder.h" |
38 | | #include "llvm/IR/Instructions.h" |
39 | | #include "llvm/IR/IntrinsicInst.h" |
40 | | #include "llvm/IR/Module.h" |
41 | | |
42 | | using namespace llvm; |
43 | | using namespace hlsl; |
44 | | |
45 | | struct HLOperationLowerHelper { |
46 | | HLModule &M; |
47 | | OP &hlslOP; |
48 | | Type *voidTy; |
49 | | Type *f32Ty; |
50 | | Type *i32Ty; |
51 | | Type *i16Ty; |
52 | | llvm::Type *i1Ty; |
53 | | Type *i8Ty; |
54 | | DxilTypeSystem &dxilTypeSys; |
55 | | DxilFunctionProps *functionProps; |
56 | | DataLayout dataLayout; |
57 | | SmallDenseMap<Type *, Type *, 4> loweredTypes; |
58 | | HLOperationLowerHelper(HLModule &HLM); |
59 | | }; |
60 | | |
61 | | HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM) |
62 | 40.6k | : M(HLM), hlslOP(*HLM.GetOP()), dxilTypeSys(HLM.GetTypeSystem()), |
63 | 40.6k | dataLayout(DataLayout(HLM.GetHLOptions().bUseMinPrecision |
64 | 40.6k | ? hlsl::DXIL::kLegacyLayoutString38.3k |
65 | 40.6k | : hlsl::DXIL::kNewLayoutString2.30k )) { |
66 | 40.6k | llvm::LLVMContext &Ctx = HLM.GetCtx(); |
67 | 40.6k | voidTy = Type::getVoidTy(Ctx); |
68 | 40.6k | f32Ty = Type::getFloatTy(Ctx); |
69 | 40.6k | i32Ty = Type::getInt32Ty(Ctx); |
70 | 40.6k | i16Ty = Type::getInt16Ty(Ctx); |
71 | 40.6k | i1Ty = Type::getInt1Ty(Ctx); |
72 | 40.6k | i8Ty = Type::getInt8Ty(Ctx); |
73 | 40.6k | Function *EntryFunc = HLM.GetEntryFunction(); |
74 | 40.6k | functionProps = nullptr; |
75 | 40.6k | if (HLM.HasDxilFunctionProps(EntryFunc)) |
76 | 34.9k | functionProps = &HLM.GetDxilFunctionProps(EntryFunc); |
77 | 40.6k | } |
78 | | |
79 | | struct HLObjectOperationLowerHelper { |
80 | | private: |
81 | | // For object intrinsics. |
82 | | HLModule &HLM; |
83 | | struct ResAttribute { |
84 | | DXIL::ResourceClass RC; |
85 | | DXIL::ResourceKind RK; |
86 | | Type *ResourceType; |
87 | | }; |
88 | | std::unordered_map<Value *, ResAttribute> HandleMetaMap; |
89 | | std::unordered_set<Instruction *> &UpdateCounterSet; |
90 | | // Map from pointer of cbuffer to pointer of resource. |
91 | | // For cbuffer like this: |
92 | | // cbuffer A { |
93 | | // Texture2D T; |
94 | | // }; |
95 | | // A global resource Texture2D T2 will be created for Texture2D T. |
96 | | // CBPtrToResourceMap[T] will return T2. |
97 | | std::unordered_map<Value *, Value *> CBPtrToResourceMap; |
98 | | |
99 | | public: |
100 | | HLObjectOperationLowerHelper(HLModule &HLM, |
101 | | std::unordered_set<Instruction *> &UpdateCounter) |
102 | 20.3k | : HLM(HLM), UpdateCounterSet(UpdateCounter) {} |
103 | 18.4k | DXIL::ResourceClass GetRC(Value *Handle) { |
104 | 18.4k | ResAttribute &Res = FindCreateHandleResourceBase(Handle); |
105 | 18.4k | return Res.RC; |
106 | 18.4k | } |
107 | 45.6k | DXIL::ResourceKind GetRK(Value *Handle) { |
108 | 45.6k | ResAttribute &Res = FindCreateHandleResourceBase(Handle); |
109 | 45.6k | return Res.RK; |
110 | 45.6k | } |
111 | 20.2k | Type *GetResourceType(Value *Handle) { |
112 | 20.2k | ResAttribute &Res = FindCreateHandleResourceBase(Handle); |
113 | 20.2k | return Res.ResourceType; |
114 | 20.2k | } |
115 | | |
116 | 2.94k | void MarkHasCounter(Value *handle, Type *i8Ty) { |
117 | 2.94k | CallInst *CIHandle = cast<CallInst>(handle); |
118 | 2.94k | DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) == |
119 | 2.94k | HLOpcodeGroup::HLAnnotateHandle, |
120 | 2.94k | "else invalid handle"); |
121 | | // Mark has counter for the input handle. |
122 | 2.94k | Value *counterHandle = |
123 | 2.94k | CIHandle->getArgOperand(HLOperandIndex::kHandleOpIdx); |
124 | | // Change kind into StructurBufferWithCounter. |
125 | 2.94k | Constant *Props = cast<Constant>(CIHandle->getArgOperand( |
126 | 2.94k | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx)); |
127 | 2.94k | DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props); |
128 | 2.94k | RP.Basic.SamplerCmpOrHasCounter = true; |
129 | | |
130 | 2.94k | CIHandle->setArgOperand( |
131 | 2.94k | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx, |
132 | 2.94k | resource_helper::getAsConstant(RP, |
133 | 2.94k | HLM.GetOP()->GetResourcePropertiesType(), |
134 | 2.94k | *HLM.GetShaderModel())); |
135 | | |
136 | 2.94k | DXIL::ResourceClass RC = GetRC(handle); |
137 | 2.94k | DXASSERT_LOCALVAR(RC, RC == DXIL::ResourceClass::UAV, |
138 | 2.94k | "must UAV for counter"); |
139 | 2.94k | std::unordered_set<Value *> resSet; |
140 | 2.94k | MarkHasCounterOnCreateHandle(counterHandle, resSet); |
141 | 2.94k | } |
142 | | |
143 | 28 | DxilResourceBase *FindCBufferResourceFromHandle(Value *handle) { |
144 | 28 | if (CallInst *CI = dyn_cast<CallInst>(handle)) { |
145 | 28 | hlsl::HLOpcodeGroup group = |
146 | 28 | hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
147 | 28 | if (group == HLOpcodeGroup::HLAnnotateHandle) { |
148 | 28 | handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
149 | 28 | } |
150 | 28 | } |
151 | | |
152 | 28 | Constant *symbol = nullptr; |
153 | 28 | if (CallInst *CI = dyn_cast<CallInst>(handle)) { |
154 | 28 | hlsl::HLOpcodeGroup group = |
155 | 28 | hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
156 | 28 | if (group == HLOpcodeGroup::HLCreateHandle) { |
157 | 28 | symbol = dyn_cast<Constant>( |
158 | 28 | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); |
159 | 28 | } |
160 | 28 | } |
161 | | |
162 | 28 | if (!symbol) |
163 | 0 | return nullptr; |
164 | | |
165 | 28 | for (const std::unique_ptr<DxilCBuffer> &res : HLM.GetCBuffers()) { |
166 | 28 | if (res->GetGlobalSymbol() == symbol) |
167 | 28 | return res.get(); |
168 | 28 | } |
169 | 0 | return nullptr; |
170 | 28 | } |
171 | | |
172 | | Value *GetOrCreateResourceForCbPtr(GetElementPtrInst *CbPtr, |
173 | | GlobalVariable *CbGV, |
174 | 314 | DxilResourceProperties &RP) { |
175 | | // Change array idx to 0 to make sure all array ptr share same key. |
176 | 314 | Value *Key = UniformCbPtr(CbPtr, CbGV); |
177 | 314 | if (CBPtrToResourceMap.count(Key)) |
178 | 24 | return CBPtrToResourceMap[Key]; |
179 | 290 | Value *Resource = CreateResourceForCbPtr(CbPtr, CbGV, RP); |
180 | 290 | CBPtrToResourceMap[Key] = Resource; |
181 | 290 | return Resource; |
182 | 314 | } |
183 | | |
184 | 314 | Value *LowerCbResourcePtr(GetElementPtrInst *CbPtr, Value *ResPtr) { |
185 | | // Simple case. |
186 | 314 | if (ResPtr->getType() == CbPtr->getType()) |
187 | 314 | return ResPtr; |
188 | | |
189 | | // Array case. |
190 | 0 | DXASSERT_NOMSG(ResPtr->getType()->getPointerElementType()->isArrayTy()); |
191 | |
|
192 | 0 | IRBuilder<> Builder(CbPtr); |
193 | 0 | gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr); |
194 | |
|
195 | 0 | Value *arrayIdx = GEPIt.getOperand(); |
196 | | |
197 | | // Only calc array idx and size. |
198 | | // Ignore struct type part. |
199 | 0 | for (; GEPIt != E; ++GEPIt) { |
200 | 0 | if (GEPIt->isArrayTy()) { |
201 | 0 | arrayIdx = Builder.CreateMul( |
202 | 0 | arrayIdx, Builder.getInt32(GEPIt->getArrayNumElements())); |
203 | 0 | arrayIdx = Builder.CreateAdd(arrayIdx, GEPIt.getOperand()); |
204 | 0 | } |
205 | 0 | } |
206 | |
|
207 | 0 | return Builder.CreateGEP(ResPtr, {Builder.getInt32(0), arrayIdx}); |
208 | 314 | } |
209 | | |
210 | 314 | DxilResourceProperties GetResPropsFromAnnotateHandle(CallInst *Anno) { |
211 | 314 | Constant *Props = cast<Constant>(Anno->getArgOperand( |
212 | 314 | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx)); |
213 | 314 | DxilResourceProperties RP = resource_helper::loadPropsFromConstant(*Props); |
214 | 314 | return RP; |
215 | 314 | } |
216 | | |
217 | | private: |
218 | 84.4k | ResAttribute &FindCreateHandleResourceBase(Value *Handle) { |
219 | 84.4k | if (HandleMetaMap.count(Handle)) |
220 | 47.7k | return HandleMetaMap[Handle]; |
221 | | |
222 | | // Add invalid first to avoid dead loop. |
223 | 36.6k | HandleMetaMap[Handle] = { |
224 | 36.6k | DXIL::ResourceClass::Invalid, DXIL::ResourceKind::Invalid, |
225 | 36.6k | StructType::get(Type::getVoidTy(HLM.GetCtx()), nullptr)}; |
226 | 36.6k | if (CallInst *CI = dyn_cast<CallInst>(Handle)) { |
227 | 36.6k | hlsl::HLOpcodeGroup group = |
228 | 36.6k | hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
229 | 36.6k | if (group == HLOpcodeGroup::HLAnnotateHandle) { |
230 | 36.6k | Constant *Props = cast<Constant>(CI->getArgOperand( |
231 | 36.6k | HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx)); |
232 | 36.6k | DxilResourceProperties RP = |
233 | 36.6k | resource_helper::loadPropsFromConstant(*Props); |
234 | 36.6k | Type *ResTy = |
235 | 36.6k | CI->getArgOperand(HLOperandIndex::kAnnotateHandleResourceTypeOpIdx) |
236 | 36.6k | ->getType(); |
237 | | |
238 | 36.6k | ResAttribute Attrib = {RP.getResourceClass(), RP.getResourceKind(), |
239 | 36.6k | ResTy}; |
240 | | |
241 | 36.6k | HandleMetaMap[Handle] = Attrib; |
242 | 36.6k | return HandleMetaMap[Handle]; |
243 | 36.6k | } |
244 | 36.6k | } |
245 | 6 | dxilutil::EmitErrorOnContext(Handle->getContext(), |
246 | 6 | "cannot map resource to handle."); |
247 | | |
248 | 6 | return HandleMetaMap[Handle]; |
249 | 36.6k | } |
250 | | CallInst *FindCreateHandle(Value *handle, |
251 | 0 | std::unordered_set<Value *> &resSet) { |
252 | 0 | // Already checked. |
253 | 0 | if (resSet.count(handle)) |
254 | 0 | return nullptr; |
255 | 0 | resSet.insert(handle); |
256 | 0 |
|
257 | 0 | if (CallInst *CI = dyn_cast<CallInst>(handle)) |
258 | 0 | return CI; |
259 | 0 | if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) { |
260 | 0 | if (CallInst *CI = FindCreateHandle(Sel->getTrueValue(), resSet)) |
261 | 0 | return CI; |
262 | 0 | if (CallInst *CI = FindCreateHandle(Sel->getFalseValue(), resSet)) |
263 | 0 | return CI; |
264 | 0 | return nullptr; |
265 | 0 | } |
266 | 0 | if (PHINode *Phi = dyn_cast<PHINode>(handle)) { |
267 | 0 | for (unsigned i = 0; i < Phi->getNumOperands(); i++) { |
268 | 0 | if (CallInst *CI = FindCreateHandle(Phi->getOperand(i), resSet)) |
269 | 0 | return CI; |
270 | 0 | } |
271 | 0 | return nullptr; |
272 | 0 | } |
273 | 0 |
|
274 | 0 | return nullptr; |
275 | 0 | } |
276 | | void MarkHasCounterOnCreateHandle(Value *handle, |
277 | 2.94k | std::unordered_set<Value *> &resSet) { |
278 | | // Already checked. |
279 | 2.94k | if (resSet.count(handle)) |
280 | 0 | return; |
281 | 2.94k | resSet.insert(handle); |
282 | | |
283 | 2.94k | if (CallInst *CI = dyn_cast<CallInst>(handle)) { |
284 | 2.94k | Value *Res = |
285 | 2.94k | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx); |
286 | 2.94k | LoadInst *LdRes = dyn_cast<LoadInst>(Res); |
287 | 2.94k | if (LdRes) { |
288 | 2.93k | UpdateCounterSet.insert(LdRes); |
289 | 2.93k | return; |
290 | 2.93k | } |
291 | 8 | if (CallInst *CallRes = dyn_cast<CallInst>(Res)) { |
292 | 8 | hlsl::HLOpcodeGroup group = |
293 | 8 | hlsl::GetHLOpcodeGroup(CallRes->getCalledFunction()); |
294 | 8 | if (group == HLOpcodeGroup::HLCast) { |
295 | 8 | HLCastOpcode opcode = |
296 | 8 | static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CallRes)); |
297 | 8 | if (opcode == HLCastOpcode::HandleToResCast) { |
298 | 8 | if (Instruction *Hdl = dyn_cast<Instruction>( |
299 | 8 | CallRes->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx))) |
300 | 8 | UpdateCounterSet.insert(Hdl); |
301 | 8 | return; |
302 | 8 | } |
303 | 8 | } |
304 | 8 | } |
305 | 0 | dxilutil::EmitErrorOnInstruction(CI, "cannot map resource to handle."); |
306 | 0 | return; |
307 | 8 | } |
308 | 0 | if (SelectInst *Sel = dyn_cast<SelectInst>(handle)) { |
309 | 0 | MarkHasCounterOnCreateHandle(Sel->getTrueValue(), resSet); |
310 | 0 | MarkHasCounterOnCreateHandle(Sel->getFalseValue(), resSet); |
311 | 0 | } |
312 | 0 | if (PHINode *Phi = dyn_cast<PHINode>(handle)) { |
313 | 0 | for (unsigned i = 0; i < Phi->getNumOperands(); i++) { |
314 | 0 | MarkHasCounterOnCreateHandle(Phi->getOperand(i), resSet); |
315 | 0 | } |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | 314 | Value *UniformCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV) { |
320 | 314 | gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr); |
321 | 314 | std::vector<Value *> idxList(CbPtr->idx_begin(), CbPtr->idx_end()); |
322 | 314 | unsigned i = 0; |
323 | 314 | IRBuilder<> Builder(HLM.GetCtx()); |
324 | 314 | Value *zero = Builder.getInt32(0); |
325 | 1.29k | for (; GEPIt != E; ++GEPIt, ++i982 ) { |
326 | 982 | ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand()); |
327 | 982 | if (!ImmIdx) { |
328 | | // Remove dynamic indexing to avoid crash. |
329 | 8 | idxList[i] = zero; |
330 | 8 | } |
331 | 982 | } |
332 | | |
333 | 314 | Value *Key = Builder.CreateInBoundsGEP(CbGV, idxList); |
334 | 314 | return Key; |
335 | 314 | } |
336 | | |
337 | | Value *CreateResourceForCbPtr(GetElementPtrInst *CbPtr, GlobalVariable *CbGV, |
338 | 290 | DxilResourceProperties &RP) { |
339 | 290 | Type *CbTy = CbPtr->getPointerOperandType(); |
340 | 290 | DXASSERT_LOCALVAR(CbTy, CbTy == CbGV->getType(), |
341 | 290 | "else arg not point to var"); |
342 | | |
343 | 290 | gep_type_iterator GEPIt = gep_type_begin(CbPtr), E = gep_type_end(CbPtr); |
344 | 290 | unsigned i = 0; |
345 | 290 | IRBuilder<> Builder(HLM.GetCtx()); |
346 | 290 | unsigned arraySize = 1; |
347 | 290 | DxilTypeSystem &typeSys = HLM.GetTypeSystem(); |
348 | | |
349 | 290 | std::string Name; |
350 | 1.19k | for (; GEPIt != E; ++GEPIt, ++i902 ) { |
351 | 902 | if (GEPIt->isArrayTy()) { |
352 | 72 | arraySize *= GEPIt->getArrayNumElements(); |
353 | 72 | if (!Name.empty()) |
354 | 72 | Name += "."; |
355 | 72 | if (ConstantInt *ImmIdx = dyn_cast<ConstantInt>(GEPIt.getOperand())) { |
356 | 64 | unsigned idx = ImmIdx->getLimitedValue(); |
357 | 64 | Name += std::to_string(idx); |
358 | 64 | } |
359 | 830 | } else if (GEPIt->isStructTy()) { |
360 | 540 | DxilStructAnnotation *typeAnnot = |
361 | 540 | typeSys.GetStructAnnotation(cast<StructType>(*GEPIt)); |
362 | 540 | DXASSERT_NOMSG(typeAnnot); |
363 | 540 | unsigned idx = cast<ConstantInt>(GEPIt.getOperand())->getLimitedValue(); |
364 | 540 | DXASSERT_NOMSG(typeAnnot->GetNumFields() > idx); |
365 | 540 | DxilFieldAnnotation &fieldAnnot = typeAnnot->GetFieldAnnotation(idx); |
366 | 540 | if (!Name.empty()) |
367 | 250 | Name += "."; |
368 | 540 | Name += fieldAnnot.GetFieldName(); |
369 | 540 | } |
370 | 902 | } |
371 | | |
372 | 290 | Type *Ty = CbPtr->getResultElementType(); |
373 | | // Not support resource array in cbuffer. |
374 | 290 | unsigned ResBinding = |
375 | 290 | HLM.GetBindingForResourceInCB(CbPtr, CbGV, RP.getResourceClass()); |
376 | 290 | return CreateResourceGV(Ty, Name, RP, ResBinding); |
377 | 290 | } |
378 | | |
379 | | Value *CreateResourceGV(Type *Ty, StringRef Name, DxilResourceProperties &RP, |
380 | 290 | unsigned ResBinding) { |
381 | 290 | Module &M = *HLM.GetModule(); |
382 | 290 | Constant *GV = M.getOrInsertGlobal(Name, Ty); |
383 | | // Create resource and set GV as globalSym. |
384 | 290 | DxilResourceBase *Res = HLM.AddResourceWithGlobalVariableAndProps(GV, RP); |
385 | 290 | DXASSERT(Res, "fail to create resource for global variable in cbuffer"); |
386 | 290 | Res->SetLowerBound(ResBinding); |
387 | 290 | return GV; |
388 | 290 | } |
389 | | }; |
390 | | |
391 | | // Helper for lowering resource extension methods. |
392 | | struct HLObjectExtensionLowerHelper : public hlsl::HLResourceLookup { |
393 | | explicit HLObjectExtensionLowerHelper(HLObjectOperationLowerHelper &ObjHelper) |
394 | 68 | : m_ObjHelper(ObjHelper) {} |
395 | | |
396 | 6 | virtual bool GetResourceKindName(Value *HLHandle, const char **ppName) { |
397 | 6 | DXIL::ResourceKind K = m_ObjHelper.GetRK(HLHandle); |
398 | 6 | bool Success = K != DXIL::ResourceKind::Invalid; |
399 | 6 | if (Success) { |
400 | 6 | *ppName = hlsl::GetResourceKindName(K); |
401 | 6 | } |
402 | 6 | return Success; |
403 | 6 | } |
404 | | |
405 | | private: |
406 | | HLObjectOperationLowerHelper &m_ObjHelper; |
407 | | }; |
408 | | |
409 | | using IntrinsicLowerFuncTy = Value *(CallInst *CI, IntrinsicOp IOP, |
410 | | DXIL::OpCode opcode, |
411 | | HLOperationLowerHelper &helper, |
412 | | HLObjectOperationLowerHelper *pObjHelper, |
413 | | bool &Translated); |
414 | | |
415 | | struct IntrinsicLower { |
416 | | // Intrinsic opcode. |
417 | | IntrinsicOp IntriOpcode; |
418 | | // Lower function. |
419 | | IntrinsicLowerFuncTy &LowerFunc; |
420 | | // DXIL opcode if can direct map. |
421 | | DXIL::OpCode DxilOpcode; |
422 | | }; |
423 | | |
424 | | // IOP intrinsics. |
425 | | namespace { |
426 | | |
427 | | // Creates the necessary scalar calls to for a "trivial" operation where only |
428 | | // call instructions to a single function type are needed. |
429 | | // The overload type `Ty` determines what scalarization might be required. |
430 | | // Elements of any vectors in `refArgs` are extracted into scalars for each |
431 | | // call generated while the same scalar values are used unaltered in each call. |
432 | | // Utility objects `HlslOp` and `Builder` are used to generate calls to the |
433 | | // given `DxilFunc` for each set of scalar arguments. |
434 | | // The results are reconstructed into the given `RetTy` as needed. |
435 | | Value *TrivialDxilOperation(Function *dxilFunc, OP::OpCode opcode, |
436 | | ArrayRef<Value *> refArgs, Type *Ty, Type *RetTy, |
437 | 32.2k | OP *hlslOP, IRBuilder<> &Builder) { |
438 | 32.2k | unsigned argNum = refArgs.size(); |
439 | 32.2k | std::vector<Value *> args = refArgs; |
440 | | |
441 | 32.2k | if (Ty->isVectorTy()) { |
442 | 8.37k | Value *retVal = llvm::UndefValue::get(RetTy); |
443 | 8.37k | unsigned vecSize = Ty->getVectorNumElements(); |
444 | 35.2k | for (unsigned i = 0; i < vecSize; i++26.8k ) { |
445 | | // Update vector args, skip known opcode arg. |
446 | 68.4k | for (unsigned argIdx = HLOperandIndex::kUnaryOpSrc0Idx; argIdx < argNum; |
447 | 41.5k | argIdx++) { |
448 | 41.5k | if (refArgs[argIdx]->getType()->isVectorTy()) { |
449 | 37.1k | Value *arg = refArgs[argIdx]; |
450 | 37.1k | args[argIdx] = Builder.CreateExtractElement(arg, i); |
451 | 37.1k | } |
452 | 41.5k | } |
453 | 26.8k | Value *EltOP = |
454 | 26.8k | Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode)); |
455 | 26.8k | retVal = Builder.CreateInsertElement(retVal, EltOP, i); |
456 | 26.8k | } |
457 | 8.37k | return retVal; |
458 | 8.37k | } |
459 | | |
460 | | // Cannot add name to void. |
461 | 23.8k | if (RetTy->isVoidTy()) |
462 | 254 | return Builder.CreateCall(dxilFunc, args); |
463 | | |
464 | 23.5k | return Builder.CreateCall(dxilFunc, args, hlslOP->GetOpCodeName(opcode)); |
465 | 23.8k | } |
466 | | |
467 | | // Creates a native vector call to for a "trivial" operation where only a single |
468 | | // call instruction is needed. The overload and return types are the same vector |
469 | | // type `Ty`. |
470 | | // Utility objects `HlslOp` and `Builder` are used to create a call to the given |
471 | | // `DxilFunc` with `RefArgs` arguments. |
472 | | Value *TrivialDxilVectorOperation(Function *Func, OP::OpCode Opcode, |
473 | | ArrayRef<Value *> Args, Type *Ty, OP *OP, |
474 | 968 | IRBuilder<> &Builder) { |
475 | 968 | if (!Ty->isVoidTy()) |
476 | 968 | return Builder.CreateCall(Func, Args, OP->GetOpCodeName(Opcode)); |
477 | 0 | return Builder.CreateCall(Func, Args); // Cannot add name to void. |
478 | 968 | } |
479 | | |
480 | | // Generates a DXIL operation with the overloaded type based on `Ty` and return |
481 | | // type `RetTy`. When Ty is a vector, it will either generate per-element calls |
482 | | // for each vector element and reconstruct the vector type from those results or |
483 | | // operate on and return native vectors depending on vector size and the |
484 | | // legality of the vector overload. |
485 | | Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs, |
486 | | Type *Ty, Type *RetTy, OP *hlslOP, |
487 | 32.4k | IRBuilder<> &Builder) { |
488 | | |
489 | | // If supported and the overload type is a vector with more than 1 element, |
490 | | // create a native vector operation. |
491 | 32.4k | if (Ty->isVectorTy() && Ty->getVectorNumElements() > 19.25k && |
492 | 32.4k | hlslOP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus()8.43k && |
493 | 32.4k | OP::IsOverloadLegal(opcode, Ty)1.05k ) { |
494 | 968 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); |
495 | 968 | return TrivialDxilVectorOperation(dxilFunc, opcode, refArgs, Ty, hlslOP, |
496 | 968 | Builder); |
497 | 968 | } |
498 | | |
499 | | // Set overload type to the scalar type of `Ty` and generate call(s). |
500 | 31.5k | Type *EltTy = Ty->getScalarType(); |
501 | 31.5k | Function *dxilFunc = hlslOP->GetOpFunc(opcode, EltTy); |
502 | | |
503 | 31.5k | return TrivialDxilOperation(dxilFunc, opcode, refArgs, Ty, RetTy, hlslOP, |
504 | 31.5k | Builder); |
505 | 32.4k | } |
506 | | |
507 | | Value *TrivialDxilOperation(OP::OpCode opcode, ArrayRef<Value *> refArgs, |
508 | 3.57k | Type *Ty, Instruction *Inst, OP *hlslOP) { |
509 | 3.57k | DXASSERT(refArgs.size() > 0, "else opcode isn't in signature"); |
510 | 3.57k | DXASSERT(refArgs[0] == nullptr, |
511 | 3.57k | "else caller has already filled the value in"); |
512 | 3.57k | IRBuilder<> B(Inst); |
513 | 3.57k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
514 | 3.57k | const_cast<llvm::Value **>(refArgs.data())[0] = |
515 | 3.57k | opArg; // actually stack memory from caller |
516 | 3.57k | return TrivialDxilOperation(opcode, refArgs, Ty, Inst->getType(), hlslOP, B); |
517 | 3.57k | } |
518 | | |
519 | | // Translate call that converts to a dxil unary operation with a different |
520 | | // return type from the overload by passing the argument, explicit return type, |
521 | | // and helper objects to the scalarizing unary dxil operation creation. |
522 | | Value *TrivialUnaryOperationRet(CallInst *CI, IntrinsicOp IOP, |
523 | | OP::OpCode OpCode, |
524 | | HLOperationLowerHelper &Helper, |
525 | | HLObjectOperationLowerHelper *, |
526 | 98 | bool &Translated) { |
527 | 98 | Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
528 | 98 | Type *Ty = Src->getType(); |
529 | | |
530 | 98 | IRBuilder<> Builder(CI); |
531 | 98 | hlsl::OP *OP = &Helper.hlslOP; |
532 | 98 | Type *RetTy = CI->getType(); |
533 | 98 | Constant *OpArg = OP->GetU32Const((unsigned)OpCode); |
534 | 98 | Value *Args[] = {OpArg, Src}; |
535 | | |
536 | 98 | return TrivialDxilOperation(OpCode, Args, Ty, RetTy, OP, Builder); |
537 | 98 | } |
538 | | |
539 | | Value *TrivialDxilUnaryOperation(OP::OpCode OpCode, Value *Src, hlsl::OP *Op, |
540 | 7.56k | IRBuilder<> &Builder) { |
541 | 7.56k | Type *Ty = Src->getType(); |
542 | | |
543 | 7.56k | Constant *OpArg = Op->GetU32Const((unsigned)OpCode); |
544 | 7.56k | Value *Args[] = {OpArg, Src}; |
545 | | |
546 | 7.56k | return TrivialDxilOperation(OpCode, Args, Ty, Ty, Op, Builder); |
547 | 7.56k | } |
548 | | |
549 | | Value *TrivialDxilBinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, |
550 | 7.06k | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
551 | 7.06k | Type *Ty = src0->getType(); |
552 | | |
553 | 7.06k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
554 | 7.06k | Value *args[] = {opArg, src0, src1}; |
555 | | |
556 | 7.06k | return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
557 | 7.06k | } |
558 | | |
559 | | Value *TrivialDxilTrinaryOperation(OP::OpCode opcode, Value *src0, Value *src1, |
560 | | Value *src2, hlsl::OP *hlslOP, |
561 | 12.9k | IRBuilder<> &Builder) { |
562 | 12.9k | Type *Ty = src0->getType(); |
563 | | |
564 | 12.9k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
565 | 12.9k | Value *args[] = {opArg, src0, src1, src2}; |
566 | | |
567 | 12.9k | return TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
568 | 12.9k | } |
569 | | |
570 | | // Translate call that trivially converts to a dxil unary operation by passing |
571 | | // argument, return type, and helper objects to either scalarizing or native |
572 | | // vector dxil operation creation depending on version and vector size. |
573 | | Value *TrivialUnaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
574 | | HLOperationLowerHelper &helper, |
575 | | HLObjectOperationLowerHelper *pObjHelper, |
576 | 4.30k | bool &Translated) { |
577 | 4.30k | Value *src0 = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
578 | 4.30k | IRBuilder<> Builder(CI); |
579 | 4.30k | hlsl::OP *hlslOP = &helper.hlslOP; |
580 | | |
581 | 4.30k | return TrivialDxilUnaryOperation(opcode, src0, hlslOP, Builder); |
582 | 4.30k | } |
583 | | |
584 | | // Translate call that trivially converts to a dxil binary operation by passing |
585 | | // arguments, return type, and helper objects to either scalarizing or native |
586 | | // vector dxil operation creation depending on version and vector size. |
587 | | Value *TrivialBinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
588 | | HLOperationLowerHelper &helper, |
589 | | HLObjectOperationLowerHelper *pObjHelper, |
590 | 2.49k | bool &Translated) { |
591 | 2.49k | hlsl::OP *hlslOP = &helper.hlslOP; |
592 | 2.49k | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
593 | 2.49k | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
594 | 2.49k | IRBuilder<> Builder(CI); |
595 | | |
596 | 2.49k | Value *binOp = |
597 | 2.49k | TrivialDxilBinaryOperation(opcode, src0, src1, hlslOP, Builder); |
598 | 2.49k | return binOp; |
599 | 2.49k | } |
600 | | |
601 | | // Translate call that trivially converts to a dxil trinary (aka tertiary) |
602 | | // operation by passing arguments, return type, and helper objects to either |
603 | | // scalarizing or native vector dxil operation creation depending on version |
604 | | // and vector size. |
605 | | Value *TrivialTrinaryOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
606 | | HLOperationLowerHelper &helper, |
607 | | HLObjectOperationLowerHelper *pObjHelper, |
608 | 12.0k | bool &Translated) { |
609 | 12.0k | hlsl::OP *hlslOP = &helper.hlslOP; |
610 | 12.0k | Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
611 | 12.0k | Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
612 | 12.0k | Value *src2 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
613 | 12.0k | IRBuilder<> Builder(CI); |
614 | | |
615 | 12.0k | Value *triOp = |
616 | 12.0k | TrivialDxilTrinaryOperation(opcode, src0, src1, src2, hlslOP, Builder); |
617 | 12.0k | return triOp; |
618 | 12.0k | } |
619 | | |
620 | | Value *TrivialIsSpecialFloat(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
621 | | HLOperationLowerHelper &helper, |
622 | | HLObjectOperationLowerHelper *pObjHelper, |
623 | 102 | bool &Translated) { |
624 | 102 | hlsl::OP *hlslOP = &helper.hlslOP; |
625 | 102 | Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
626 | 102 | IRBuilder<> Builder(CI); |
627 | | |
628 | 102 | Type *Ty = src->getType(); |
629 | 102 | Type *RetTy = Type::getInt1Ty(CI->getContext()); |
630 | 102 | if (Ty->isVectorTy()) |
631 | 90 | RetTy = VectorType::get(RetTy, Ty->getVectorNumElements()); |
632 | | |
633 | 102 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
634 | 102 | Value *args[] = {opArg, src}; |
635 | | |
636 | 102 | return TrivialDxilOperation(opcode, args, Ty, RetTy, hlslOP, Builder); |
637 | 102 | } |
638 | | |
639 | 120 | bool IsResourceGEP(GetElementPtrInst *I) { |
640 | 120 | Type *Ty = I->getType()->getPointerElementType(); |
641 | 120 | Ty = dxilutil::GetArrayEltTy(Ty); |
642 | | // Only mark on GEP which point to resource. |
643 | 120 | return dxilutil::IsHLSLResourceType(Ty); |
644 | 120 | } |
645 | | |
646 | | Value *TranslateNonUniformResourceIndex( |
647 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
648 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
649 | 196 | bool &Translated) { |
650 | 196 | Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
651 | 196 | Type *hdlTy = helper.hlslOP.GetHandleType(); |
652 | 212 | for (User *U : CI->users()) { |
653 | 212 | if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(U)) { |
654 | | // Only mark on GEP which point to resource. |
655 | 108 | if (IsResourceGEP(I)) |
656 | 100 | DxilMDHelper::MarkNonUniform(I); |
657 | 108 | } else if (CastInst *104 castI104 = dyn_cast<CastInst>(U)) { |
658 | 40 | for (User *castU : castI->users()) { |
659 | 40 | if (GetElementPtrInst *I = dyn_cast<GetElementPtrInst>(castU)) { |
660 | | // Only mark on GEP which point to resource. |
661 | 12 | if (IsResourceGEP(I)) |
662 | 12 | DxilMDHelper::MarkNonUniform(I); |
663 | 28 | } else if (CallInst *CI = dyn_cast<CallInst>(castU)) { |
664 | 28 | if (CI->getType() == hdlTy) |
665 | 28 | DxilMDHelper::MarkNonUniform(CI); |
666 | 28 | } |
667 | 40 | } |
668 | 64 | } else if (CallInst *CI = dyn_cast<CallInst>(U)) { |
669 | 64 | if (CI->getType() == hdlTy) |
670 | 44 | DxilMDHelper::MarkNonUniform(CI); |
671 | 64 | } |
672 | 212 | } |
673 | 196 | CI->replaceAllUsesWith(V); |
674 | 196 | return nullptr; |
675 | 196 | } |
676 | | |
677 | | Value *TrivialBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
678 | | HLOperationLowerHelper &helper, |
679 | | HLObjectOperationLowerHelper *pObjHelper, |
680 | 1.50k | bool &Translated) { |
681 | 1.50k | hlsl::OP *OP = &helper.hlslOP; |
682 | 1.50k | Function *dxilFunc = OP->GetOpFunc(OP::OpCode::Barrier, CI->getType()); |
683 | 1.50k | Constant *opArg = OP->GetU32Const((unsigned)OP::OpCode::Barrier); |
684 | | |
685 | 1.50k | unsigned uglobal = static_cast<unsigned>(DXIL::BarrierMode::UAVFenceGlobal); |
686 | 1.50k | unsigned g = static_cast<unsigned>(DXIL::BarrierMode::TGSMFence); |
687 | 1.50k | unsigned t = static_cast<unsigned>(DXIL::BarrierMode::SyncThreadGroup); |
688 | | // unsigned ut = |
689 | | // static_cast<unsigned>(DXIL::BarrierMode::UAVFenceThreadGroup); |
690 | | |
691 | 1.50k | unsigned barrierMode = 0; |
692 | 1.50k | switch (IOP) { |
693 | 8 | case IntrinsicOp::IOP_AllMemoryBarrier: |
694 | 8 | barrierMode = uglobal | g; |
695 | 8 | break; |
696 | 16 | case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync: |
697 | 16 | barrierMode = uglobal | g | t; |
698 | 16 | break; |
699 | 32 | case IntrinsicOp::IOP_GroupMemoryBarrier: |
700 | 32 | barrierMode = g; |
701 | 32 | break; |
702 | 1.41k | case IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync: |
703 | 1.41k | barrierMode = g | t; |
704 | 1.41k | break; |
705 | 24 | case IntrinsicOp::IOP_DeviceMemoryBarrier: |
706 | 24 | barrierMode = uglobal; |
707 | 24 | break; |
708 | 8 | case IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync: |
709 | 8 | barrierMode = uglobal | t; |
710 | 8 | break; |
711 | 0 | default: |
712 | 0 | DXASSERT(0, "invalid opcode for barrier"); |
713 | 0 | break; |
714 | 1.50k | } |
715 | 1.50k | Value *src0 = OP->GetU32Const(static_cast<unsigned>(barrierMode)); |
716 | | |
717 | 1.50k | Value *args[] = {opArg, src0}; |
718 | | |
719 | 1.50k | IRBuilder<> Builder(CI); |
720 | 1.50k | Builder.CreateCall(dxilFunc, args); |
721 | 1.50k | return nullptr; |
722 | 1.50k | } |
723 | | |
724 | | Value *TranslateD3DColorToUByte4(CallInst *CI, IntrinsicOp IOP, |
725 | | OP::OpCode opcode, |
726 | | HLOperationLowerHelper &helper, |
727 | | HLObjectOperationLowerHelper *pObjHelper, |
728 | 32 | bool &Translated) { |
729 | 32 | IRBuilder<> Builder(CI); |
730 | 32 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
731 | 32 | Type *Ty = val->getType(); |
732 | | |
733 | | // Use the same scaling factor used by FXC (i.e., 255.001953) |
734 | | // Excerpt from stackoverflow discussion: |
735 | | // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5" |
736 | 32 | Constant *toByteConst = ConstantFP::get(Ty->getScalarType(), 255.001953); |
737 | | |
738 | 32 | if (Ty->isVectorTy()) { |
739 | 32 | static constexpr int supportedVecElemCount = 4; |
740 | 32 | if (Ty->getVectorNumElements() != supportedVecElemCount) { |
741 | 0 | llvm_unreachable( |
742 | 0 | "Unsupported input type for intrinsic D3DColorToUByte4."); |
743 | 0 | return UndefValue::get(CI->getType()); |
744 | 0 | } |
745 | | |
746 | 32 | toByteConst = ConstantVector::getSplat(supportedVecElemCount, toByteConst); |
747 | | // Swizzle the input val -> val.zyxw |
748 | 32 | SmallVector<int, 4> mask{2, 1, 0, 3}; |
749 | 32 | val = Builder.CreateShuffleVector(val, val, mask); |
750 | 32 | } |
751 | | |
752 | 32 | Value *byte4 = Builder.CreateFMul(toByteConst, val); |
753 | 32 | return Builder.CreateCast(Instruction::CastOps::FPToSI, byte4, CI->getType()); |
754 | 32 | } |
755 | | |
756 | | // Returns true if pow can be implemented using Fxc's mul-only code gen pattern. |
757 | | // Fxc uses the below rules when choosing mul-only code gen pattern to implement |
758 | | // pow function. Rule 1: Applicable only to power values in the range |
759 | | // [INT32_MIN, INT32_MAX] Rule 2: The maximum number of mul ops needed shouldn't |
760 | | // exceed (2n+1) or (n+1) based on whether the power |
761 | | // is a positive or a negative value. Here "n" is the number of scalar |
762 | | // elements in power. |
763 | | // Rule 3: Power must be an exact value. |
764 | | // +----------+---------------------+------------------+ |
765 | | // | BaseType | IsExponentPositive | MaxMulOpsAllowed | |
766 | | // +----------+---------------------+------------------+ |
767 | | // | float4x4 | True | 33 | |
768 | | // | float4x4 | False | 17 | |
769 | | // | float4x2 | True | 17 | |
770 | | // | float4x2 | False | 9 | |
771 | | // | float2x4 | True | 17 | |
772 | | // | float2x4 | False | 9 | |
773 | | // | float4 | True | 9 | |
774 | | // | float4 | False | 5 | |
775 | | // | float2 | True | 5 | |
776 | | // | float2 | False | 3 | |
777 | | // | float | True | 3 | |
778 | | // | float | False | 2 | |
779 | | // +----------+---------------------+------------------+ |
780 | | |
781 | | bool CanUseFxcMulOnlyPatternForPow(IRBuilder<> &Builder, Value *x, Value *pow, |
782 | 1.45k | int32_t &powI) { |
783 | | // Applicable only when power is a literal. |
784 | 1.45k | if (!isa<ConstantDataVector>(pow) && !isa<ConstantFP>(pow)262 ) { |
785 | 74 | return false; |
786 | 74 | } |
787 | | |
788 | | // Only apply this code gen on splat values. |
789 | 1.38k | if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(pow)) { |
790 | 1.19k | if (!hlsl::dxilutil::IsSplat(cdv)) { |
791 | 8 | return false; |
792 | 8 | } |
793 | 1.19k | } |
794 | | |
795 | | // Only apply on aggregates of 16 or fewer elements, |
796 | | // representing the max 4x4 matrix size. |
797 | 1.37k | Type *Ty = x->getType(); |
798 | 1.37k | if (Ty->isVectorTy() && Ty->getVectorNumElements() > 161.18k ) |
799 | 0 | return false; |
800 | | |
801 | 1.37k | APFloat powAPF = isa<ConstantDataVector>(pow) |
802 | 1.37k | ? cast<ConstantDataVector>(pow)->getElementAsAPFloat(0)1.18k |
803 | 1.37k | : // should be a splat value |
804 | 1.37k | cast<ConstantFP>(pow)->getValueAPF()188 ; |
805 | 1.37k | APSInt powAPS(32, false); |
806 | 1.37k | bool isExact = false; |
807 | | // Try converting float value of power to integer and also check if the float |
808 | | // value is exact. |
809 | 1.37k | APFloat::opStatus status = |
810 | 1.37k | powAPF.convertToInteger(powAPS, APFloat::rmTowardZero, &isExact); |
811 | 1.37k | if (status == APFloat::opStatus::opOK && isExact348 ) { |
812 | 340 | powI = powAPS.getExtValue(); |
813 | 340 | uint32_t powU = abs(powI); |
814 | 340 | int setBitCount = 0; |
815 | 340 | int maxBitSetPos = -1; |
816 | 11.2k | for (int i = 0; i < 32; i++10.8k ) { |
817 | 10.8k | if ((powU >> i) & 1) { |
818 | 548 | setBitCount++; |
819 | 548 | maxBitSetPos = i; |
820 | 548 | } |
821 | 10.8k | } |
822 | | |
823 | 340 | DXASSERT(maxBitSetPos <= 30, "msb should always be zero."); |
824 | 340 | unsigned numElem = |
825 | 340 | isa<ConstantDataVector>(pow) ? x->getType()->getVectorNumElements()152 : 1188 ; |
826 | 340 | int mulOpThreshold = powI < 0 ? numElem + 132 : 2 * numElem + 1308 ; |
827 | 340 | int mulOpNeeded = maxBitSetPos + setBitCount - 1; |
828 | 340 | return mulOpNeeded <= mulOpThreshold; |
829 | 340 | } |
830 | | |
831 | 1.03k | return false; |
832 | 1.37k | } |
833 | | |
834 | | Value *TranslatePowUsingFxcMulOnlyPattern(IRBuilder<> &Builder, Value *x, |
835 | 184 | const int32_t y) { |
836 | 184 | uint32_t absY = abs(y); |
837 | | // If y is zero then always return 1. |
838 | 184 | if (absY == 0) { |
839 | 8 | return ConstantFP::get(x->getType(), 1); |
840 | 8 | } |
841 | | |
842 | 176 | int lastSetPos = -1; |
843 | 176 | Value *result = nullptr; |
844 | 176 | Value *mul = nullptr; |
845 | 5.80k | for (int i = 0; i < 32; i++5.63k ) { |
846 | 5.63k | if ((absY >> i) & 1) { |
847 | 1.31k | for (int j = i; j > lastSetPos; j--1.00k ) { |
848 | 1.00k | if (!mul) { |
849 | 176 | mul = x; |
850 | 832 | } else { |
851 | 832 | mul = Builder.CreateFMul(mul, mul); |
852 | 832 | } |
853 | 1.00k | } |
854 | | |
855 | 304 | result = (result == nullptr) ? mul176 : Builder.CreateFMul(result, mul)128 ; |
856 | 304 | lastSetPos = i; |
857 | 304 | } |
858 | 5.63k | } |
859 | | |
860 | | // Compute reciprocal for negative power values. |
861 | 176 | if (y < 0) { |
862 | 32 | Value *constOne = ConstantFP::get(x->getType(), 1); |
863 | 32 | result = Builder.CreateFDiv(constOne, result); |
864 | 32 | } |
865 | | |
866 | 176 | return result; |
867 | 184 | } |
868 | | |
869 | | Value *TranslatePowImpl(hlsl::OP *hlslOP, IRBuilder<> &Builder, Value *x, |
870 | 1.45k | Value *y, bool isFXCCompatMode = false) { |
871 | | // As applicable implement pow using only mul ops as done by Fxc. |
872 | 1.45k | int32_t p = 0; |
873 | 1.45k | if (CanUseFxcMulOnlyPatternForPow(Builder, x, y, p)) { |
874 | 304 | if (isFXCCompatMode) |
875 | 184 | return TranslatePowUsingFxcMulOnlyPattern(Builder, x, p); |
876 | | // Only take care 2 for it will not affect register pressure. |
877 | 120 | if (p == 2) |
878 | 56 | return Builder.CreateFMul(x, x); |
879 | 120 | } |
880 | | |
881 | | // Default to log-mul-exp pattern if previous scenarios don't apply. |
882 | | // t = log(x); |
883 | 1.21k | Value *logX = |
884 | 1.21k | TrivialDxilUnaryOperation(DXIL::OpCode::Log, x, hlslOP, Builder); |
885 | | // t = y * t; |
886 | 1.21k | Value *mulY = Builder.CreateFMul(logX, y); |
887 | | // pow = exp(t); |
888 | 1.21k | return TrivialDxilUnaryOperation(DXIL::OpCode::Exp, mulY, hlslOP, Builder); |
889 | 1.45k | } |
890 | | |
891 | | Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
892 | | HLOperationLowerHelper &helper, |
893 | | HLObjectOperationLowerHelper *pObjHelper, |
894 | 32 | bool &Translated) { |
895 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
896 | 32 | IRBuilder<> Builder(CI); |
897 | 32 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
898 | 32 | Type *Ty = val->getType(); |
899 | 32 | VectorType *VT = dyn_cast<VectorType>(Ty); |
900 | 32 | if (!VT) { |
901 | 0 | dxilutil::EmitErrorOnInstruction( |
902 | 0 | CI, "AddUint64 can only be applied to uint2 and uint4 operands."); |
903 | 0 | return UndefValue::get(Ty); |
904 | 0 | } |
905 | | |
906 | 32 | unsigned size = VT->getNumElements(); |
907 | 32 | if (size != 2 && size != 424 ) { |
908 | 16 | dxilutil::EmitErrorOnInstruction( |
909 | 16 | CI, "AddUint64 can only be applied to uint2 and uint4 operands."); |
910 | 16 | return UndefValue::get(Ty); |
911 | 16 | } |
912 | 16 | Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
913 | 16 | Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
914 | | |
915 | 16 | Value *RetVal = UndefValue::get(Ty); |
916 | | |
917 | 16 | Function *AddC = hlslOP->GetOpFunc(DXIL::OpCode::UAddc, helper.i32Ty); |
918 | 16 | Value *opArg = Builder.getInt32(static_cast<unsigned>(DXIL::OpCode::UAddc)); |
919 | 40 | for (unsigned i = 0; i < size; i += 224 ) { |
920 | 24 | Value *low0 = Builder.CreateExtractElement(op0, i); |
921 | 24 | Value *low1 = Builder.CreateExtractElement(op1, i); |
922 | 24 | Value *lowWithC = Builder.CreateCall(AddC, {opArg, low0, low1}); |
923 | 24 | Value *low = Builder.CreateExtractValue(lowWithC, 0); |
924 | 24 | RetVal = Builder.CreateInsertElement(RetVal, low, i); |
925 | | |
926 | 24 | Value *carry = Builder.CreateExtractValue(lowWithC, 1); |
927 | | // Ext i1 to i32 |
928 | 24 | carry = Builder.CreateZExt(carry, helper.i32Ty); |
929 | | |
930 | 24 | Value *hi0 = Builder.CreateExtractElement(op0, i + 1); |
931 | 24 | Value *hi1 = Builder.CreateExtractElement(op1, i + 1); |
932 | 24 | Value *hi = Builder.CreateAdd(hi0, hi1); |
933 | 24 | hi = Builder.CreateAdd(hi, carry); |
934 | 24 | RetVal = Builder.CreateInsertElement(RetVal, hi, i + 1); |
935 | 24 | } |
936 | 16 | return RetVal; |
937 | 32 | } |
938 | | |
939 | 936 | bool IsValidLoadInput(Value *V) { |
940 | | // Must be load input. |
941 | | // TODO: report this error on front-end |
942 | 936 | if (!V || !isa<CallInst>(V)) { |
943 | 12 | return false; |
944 | 12 | } |
945 | 924 | CallInst *CI = cast<CallInst>(V); |
946 | | // Must be immediate. |
947 | 924 | ConstantInt *opArg = |
948 | 924 | cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx)); |
949 | 924 | DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue()); |
950 | 924 | if (op != DXIL::OpCode::LoadInput) { |
951 | 0 | return false; |
952 | 0 | } |
953 | 924 | return true; |
954 | 924 | } |
955 | | |
956 | | // Tunnel through insert/extract element and shuffle to find original source |
957 | | // of scalar value, or specified element (vecIdx) of vector value. |
958 | 936 | Value *FindScalarSource(Value *src, unsigned vecIdx = 0) { |
959 | 936 | Type *srcTy = src->getType()->getScalarType(); |
960 | 6.16k | while (src && !isa<UndefValue>(src)) { |
961 | 6.16k | if (src->getType()->isVectorTy()) { |
962 | 5.10k | if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) { |
963 | 4.18k | unsigned curIdx = (unsigned)cast<ConstantInt>(IE->getOperand(2)) |
964 | 4.18k | ->getUniqueInteger() |
965 | 4.18k | .getLimitedValue(); |
966 | 4.18k | src = IE->getOperand((curIdx == vecIdx) ? 1938 : 03.25k ); |
967 | 4.18k | } else if (ShuffleVectorInst *916 SV916 = dyn_cast<ShuffleVectorInst>(src)) { |
968 | 904 | int newIdx = SV->getMaskValue(vecIdx); |
969 | 904 | if (newIdx < 0) |
970 | 0 | return UndefValue::get(srcTy); |
971 | 904 | vecIdx = (unsigned)newIdx; |
972 | 904 | src = SV->getOperand(0); |
973 | 904 | unsigned numElt = src->getType()->getVectorNumElements(); |
974 | 904 | if (numElt <= vecIdx) { |
975 | 0 | vecIdx -= numElt; |
976 | 0 | src = SV->getOperand(1); |
977 | 0 | } |
978 | 904 | } else { |
979 | 12 | return UndefValue::get(srcTy); // Didn't find it. |
980 | 12 | } |
981 | 5.10k | } else { |
982 | 1.06k | if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(src)) { |
983 | 56 | vecIdx = (unsigned)cast<ConstantInt>(EE->getIndexOperand()) |
984 | 56 | ->getUniqueInteger() |
985 | 56 | .getLimitedValue(); |
986 | 56 | src = EE->getVectorOperand(); |
987 | 1.00k | } else if (hlsl::dxilutil::IsConvergentMarker(src)) { |
988 | 80 | src = hlsl::dxilutil::GetConvergentSource(src); |
989 | 924 | } else { |
990 | 924 | break; // Found it. |
991 | 924 | } |
992 | 1.06k | } |
993 | 6.16k | } |
994 | 924 | return src; |
995 | 936 | } |
996 | | |
997 | | // Finds corresponding inputs, calls translation for each, and returns |
998 | | // resulting vector or scalar. |
999 | | // Uses functor that takes (inputElemID, rowIdx, colIdx), and returns |
1000 | | // translation for one input scalar. |
1001 | | Value *TranslateEvalHelper( |
1002 | | CallInst *CI, Value *val, IRBuilder<> &Builder, |
1003 | 266 | std::function<Value *(Value *, Value *, Value *)> fnTranslateScalarInput) { |
1004 | 266 | Type *Ty = CI->getType(); |
1005 | 266 | Value *result = UndefValue::get(Ty); |
1006 | 266 | if (Ty->isVectorTy()) { |
1007 | 1.10k | for (unsigned i = 0; i < Ty->getVectorNumElements(); ++i882 ) { |
1008 | 894 | Value *InputEl = FindScalarSource(val, i); |
1009 | 894 | if (!IsValidLoadInput(InputEl)) { |
1010 | 12 | dxilutil::EmitErrorOnInstruction( |
1011 | 12 | CI, "attribute evaluation can only be done " |
1012 | 12 | "on values taken directly from inputs."); |
1013 | 12 | return result; |
1014 | 12 | } |
1015 | 882 | CallInst *loadInput = cast<CallInst>(InputEl); |
1016 | 882 | Value *inputElemID = |
1017 | 882 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); |
1018 | 882 | Value *rowIdx = |
1019 | 882 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); |
1020 | 882 | Value *colIdx = |
1021 | 882 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); |
1022 | 882 | Value *Elt = fnTranslateScalarInput(inputElemID, rowIdx, colIdx); |
1023 | 882 | result = Builder.CreateInsertElement(result, Elt, i); |
1024 | 882 | } |
1025 | 224 | } else { |
1026 | 42 | Value *InputEl = FindScalarSource(val); |
1027 | 42 | if (!IsValidLoadInput(InputEl)) { |
1028 | 0 | dxilutil::EmitErrorOnInstruction(CI, |
1029 | 0 | "attribute evaluation can only be done " |
1030 | 0 | "on values taken directly from inputs."); |
1031 | 0 | return result; |
1032 | 0 | } |
1033 | 42 | CallInst *loadInput = cast<CallInst>(InputEl); |
1034 | 42 | Value *inputElemID = |
1035 | 42 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputIDOpIdx); |
1036 | 42 | Value *rowIdx = |
1037 | 42 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputRowOpIdx); |
1038 | 42 | Value *colIdx = |
1039 | 42 | loadInput->getArgOperand(DXIL::OperandIndex::kLoadInputColOpIdx); |
1040 | 42 | result = fnTranslateScalarInput(inputElemID, rowIdx, colIdx); |
1041 | 42 | } |
1042 | 254 | return result; |
1043 | 266 | } |
1044 | | |
1045 | | Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1046 | | HLOperationLowerHelper &helper, |
1047 | | HLObjectOperationLowerHelper *pObjHelper, |
1048 | 80 | bool &Translated) { |
1049 | 80 | hlsl::OP *hlslOP = &helper.hlslOP; |
1050 | 80 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1051 | 80 | Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1052 | 80 | IRBuilder<> Builder(CI); |
1053 | 80 | OP::OpCode opcode = OP::OpCode::EvalSampleIndex; |
1054 | 80 | Value *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1055 | 80 | Function *evalFunc = |
1056 | 80 | hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType()); |
1057 | | |
1058 | 80 | return TranslateEvalHelper( |
1059 | 80 | CI, val, Builder, |
1060 | 160 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1061 | 160 | return Builder.CreateCall( |
1062 | 160 | evalFunc, {opArg, inputElemID, rowIdx, colIdx, sampleIdx}); |
1063 | 160 | }); |
1064 | 80 | } |
1065 | | |
1066 | | Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1067 | | HLOperationLowerHelper &helper, |
1068 | | HLObjectOperationLowerHelper *pObjHelper, |
1069 | 16 | bool &Translated) { |
1070 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
1071 | 16 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1072 | 16 | Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1073 | 16 | IRBuilder<> Builder(CI); |
1074 | 16 | Value *offsetX = Builder.CreateExtractElement(offset, (uint64_t)0); |
1075 | 16 | Value *offsetY = Builder.CreateExtractElement(offset, 1); |
1076 | 16 | OP::OpCode opcode = OP::OpCode::EvalSnapped; |
1077 | 16 | Value *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1078 | 16 | Function *evalFunc = |
1079 | 16 | hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType()); |
1080 | | |
1081 | 16 | return TranslateEvalHelper( |
1082 | 16 | CI, val, Builder, |
1083 | 64 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1084 | 64 | return Builder.CreateCall( |
1085 | 64 | evalFunc, {opArg, inputElemID, rowIdx, colIdx, offsetX, offsetY}); |
1086 | 64 | }); |
1087 | 16 | } |
1088 | | |
1089 | | Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1090 | | HLOperationLowerHelper &helper, |
1091 | | HLObjectOperationLowerHelper *pObjHelper, |
1092 | 88 | bool &Translated) { |
1093 | 88 | hlsl::OP *hlslOP = &helper.hlslOP; |
1094 | 88 | Value *val = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx); |
1095 | 88 | IRBuilder<> Builder(CI); |
1096 | 88 | OP::OpCode opcode = OP::OpCode::EvalCentroid; |
1097 | 88 | Value *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1098 | 88 | Function *evalFunc = |
1099 | 88 | hlslOP->GetOpFunc(opcode, CI->getType()->getScalarType()); |
1100 | | |
1101 | 88 | return TranslateEvalHelper( |
1102 | 88 | CI, val, Builder, |
1103 | 410 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1104 | 410 | return Builder.CreateCall(evalFunc, |
1105 | 410 | {opArg, inputElemID, rowIdx, colIdx}); |
1106 | 410 | }); |
1107 | 88 | } |
1108 | | |
1109 | | /* |
1110 | | HLSL: bool RWDispatchNodeInputRecord<recordType>::FinishedCrossGroupSharing() |
1111 | | DXIL: i1 @dx.op.finishedCrossGroupSharing(i32 %Opcode, |
1112 | | %dx.types.NodeRecordHandle %NodeInputRecordHandle) |
1113 | | */ |
1114 | | Value *TranslateNodeFinishedCrossGroupSharing( |
1115 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1116 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
1117 | 8 | bool &Translated) { |
1118 | 8 | hlsl::OP *OP = &helper.hlslOP; |
1119 | | |
1120 | 8 | Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
1121 | 8 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1122 | 8 | DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); |
1123 | 8 | Value *opArg = OP->GetU32Const((unsigned)op); |
1124 | | |
1125 | 8 | IRBuilder<> Builder(CI); |
1126 | 8 | return Builder.CreateCall(dxilFunc, {opArg, handle}); |
1127 | 8 | } |
1128 | | |
1129 | | /* |
1130 | | HLSL: |
1131 | | bool NodeOutput<recordType>::IsValid() |
1132 | | bool EmptyNodeOutput::IsValid() |
1133 | | DXIL: |
1134 | | i1 @dx.op.nodeOutputIsValid(i32 %Opcode, %dx.types.NodeHandle |
1135 | | %NodeOutputHandle) |
1136 | | */ |
1137 | | Value *TranslateNodeOutputIsValid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1138 | | HLOperationLowerHelper &helper, |
1139 | | HLObjectOperationLowerHelper *pObjHelper, |
1140 | 48 | bool &Translated) { |
1141 | 48 | hlsl::OP *OP = &helper.hlslOP; |
1142 | 48 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1143 | 48 | Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
1144 | 48 | Value *opArg = OP->GetU32Const((unsigned)op); |
1145 | | |
1146 | 48 | IRBuilder<> Builder(CI); |
1147 | 48 | return Builder.CreateCall(dxilFunc, {opArg, handle}); |
1148 | 48 | } |
1149 | | |
1150 | | Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, |
1151 | | OP::OpCode op, |
1152 | | HLOperationLowerHelper &helper, |
1153 | | HLObjectOperationLowerHelper *pObjHelper, |
1154 | 82 | bool &Translated) { |
1155 | 82 | DXASSERT(op == OP::OpCode::AttributeAtVertex, "Wrong opcode to translate"); |
1156 | 82 | hlsl::OP *hlslOP = &helper.hlslOP; |
1157 | 82 | IRBuilder<> Builder(CI); |
1158 | 82 | Value *val = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc0OpIdx); |
1159 | 82 | Value *vertexIdx = CI->getArgOperand(DXIL::OperandIndex::kBinarySrc1OpIdx); |
1160 | 82 | Value *vertexI8Idx = |
1161 | 82 | Builder.CreateTrunc(vertexIdx, Type::getInt8Ty(CI->getContext())); |
1162 | 82 | Value *opArg = hlslOP->GetU32Const((unsigned)op); |
1163 | 82 | Function *evalFunc = hlslOP->GetOpFunc(op, val->getType()->getScalarType()); |
1164 | | |
1165 | 82 | return TranslateEvalHelper( |
1166 | 82 | CI, val, Builder, |
1167 | 290 | [&](Value *inputElemID, Value *rowIdx, Value *colIdx) -> Value * { |
1168 | 290 | return Builder.CreateCall( |
1169 | 290 | evalFunc, {opArg, inputElemID, rowIdx, colIdx, vertexI8Idx}); |
1170 | 290 | }); |
1171 | 82 | } |
1172 | | /* |
1173 | | |
1174 | | HLSL: |
1175 | | void Barrier(uint MemoryTypeFlags, uint SemanticFlags) |
1176 | | void Barrier(Object o, uint SemanticFlags) |
1177 | | |
1178 | | All UAVs and/or Node Records by types: |
1179 | | void @dx.op.barrierByMemoryType(i32 %Opcode, |
1180 | | i32 %MemoryTypeFlags, i32 %SemanticFlags) |
1181 | | |
1182 | | UAV by handle: |
1183 | | void @dx.op.barrierByMemoryHandle(i32 %Opcode, |
1184 | | %dx.types.Handle %Object, i32 %SemanticFlags) |
1185 | | |
1186 | | Node Record by handle: |
1187 | | void @dx.op.barrierByMemoryHandle(i32 %Opcode, |
1188 | | %dx.types.NodeRecordHandle %Object, i32 %SemanticFlags) |
1189 | | */ |
1190 | | |
1191 | | Value *TranslateBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1192 | | HLOperationLowerHelper &helper, |
1193 | | HLObjectOperationLowerHelper *pObjHelper, |
1194 | 242 | bool &Translated) { |
1195 | 242 | hlsl::OP *OP = &helper.hlslOP; |
1196 | 242 | Value *HandleOrMemoryFlags = |
1197 | 242 | CI->getArgOperand(HLOperandIndex::kBarrierMemoryTypeFlagsOpIdx); |
1198 | 242 | Value *SemanticFlags = |
1199 | 242 | CI->getArgOperand(HLOperandIndex::kBarrierSemanticFlagsOpIdx); |
1200 | 242 | IRBuilder<> Builder(CI); |
1201 | | |
1202 | 242 | if (HandleOrMemoryFlags->getType()->isIntegerTy()) { |
1203 | 86 | op = OP::OpCode::BarrierByMemoryType; |
1204 | 156 | } else if (HandleOrMemoryFlags->getType() == OP->GetHandleType()) { |
1205 | 80 | op = OP::OpCode::BarrierByMemoryHandle; |
1206 | 80 | } else if (76 HandleOrMemoryFlags->getType() == OP->GetNodeRecordHandleType()76 ) { |
1207 | 76 | op = OP::OpCode::BarrierByNodeRecordHandle; |
1208 | 76 | } else { |
1209 | 0 | DXASSERT(false, "Shouldn't get here"); |
1210 | 0 | } |
1211 | | |
1212 | 242 | Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); |
1213 | 242 | Constant *opArg = OP->GetU32Const((unsigned)op); |
1214 | | |
1215 | 242 | Value *args[] = {opArg, HandleOrMemoryFlags, SemanticFlags}; |
1216 | | |
1217 | 242 | Builder.CreateCall(dxilFunc, args); |
1218 | 242 | return nullptr; |
1219 | 242 | } |
1220 | | |
1221 | | Value *TranslateGetGroupOrThreadNodeOutputRecords( |
1222 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1223 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
1224 | 272 | bool isPerThreadRecord, bool &Translated) { |
1225 | 272 | IRBuilder<> Builder(CI); |
1226 | 272 | hlsl::OP *OP = &helper.hlslOP; |
1227 | 272 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1228 | 272 | Function *dxilFunc = OP->GetOpFunc(op, Builder.getVoidTy()); |
1229 | 272 | Value *opArg = OP->GetU32Const((unsigned)op); |
1230 | 272 | Value *count = |
1231 | 272 | CI->getArgOperand(HLOperandIndex::kAllocateRecordNumRecordsIdx); |
1232 | 272 | Value *perThread = OP->GetI1Const(isPerThreadRecord); |
1233 | | |
1234 | 272 | Value *args[] = {opArg, handle, count, perThread}; |
1235 | | |
1236 | 272 | return Builder.CreateCall(dxilFunc, args); |
1237 | 272 | } |
1238 | | |
1239 | | /* |
1240 | | HLSL: |
1241 | | GroupNodeOutputRecords<recordType> |
1242 | | NodeOutput<recordType>::GetGroupNodeOutputRecords(uint numRecords); DXIL: |
1243 | | %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode, |
1244 | | %dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread) |
1245 | | */ |
1246 | | Value * |
1247 | | TranslateGetGroupNodeOutputRecords(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1248 | | HLOperationLowerHelper &helper, |
1249 | | HLObjectOperationLowerHelper *pObjHelper, |
1250 | 144 | bool &Translated) { |
1251 | 144 | return TranslateGetGroupOrThreadNodeOutputRecords( |
1252 | 144 | CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ false, |
1253 | 144 | Translated); |
1254 | 144 | } |
1255 | | |
1256 | | /* |
1257 | | HLSL: |
1258 | | ThreadNodeOutputRecords<recordType> |
1259 | | NodeOutput<recordType>::GetThreadNodeOutputRecords(uint numRecords) DXIL: |
1260 | | %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode, |
1261 | | %dx.types.NodeHandle %NodeOutputHandle, i32 %NumRecords, i1 %PerThread) |
1262 | | */ |
1263 | | Value *TranslateGetThreadNodeOutputRecords( |
1264 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1265 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
1266 | 128 | bool &Translated) { |
1267 | 128 | return TranslateGetGroupOrThreadNodeOutputRecords( |
1268 | 128 | CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ true, |
1269 | 128 | Translated); |
1270 | 128 | } |
1271 | | |
1272 | | /* |
1273 | | HLSL: |
1274 | | uint EmptyNodeInput::Count() |
1275 | | uint GroupNodeInputRecords<recordType>::Count() |
1276 | | uint RWGroupNodeInputRecords<recordType>::Count() |
1277 | | |
1278 | | DXIL: |
1279 | | i32 @dx.op.getInputRecordCount(i32 %Opcode, %dx.types.NodeRecordHandle |
1280 | | %NodeInputHandle) |
1281 | | */ |
1282 | | Value * |
1283 | | TranslateNodeGetInputRecordCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1284 | | HLOperationLowerHelper &helper, |
1285 | | HLObjectOperationLowerHelper *pObjHelper, |
1286 | 30 | bool &Translated) { |
1287 | 30 | hlsl::OP *OP = &helper.hlslOP; |
1288 | | |
1289 | 30 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
1290 | 30 | DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); |
1291 | 30 | Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
1292 | 30 | Value *opArg = OP->GetU32Const((unsigned)op); |
1293 | 30 | Value *args[] = {opArg, handle}; |
1294 | | |
1295 | 30 | IRBuilder<> Builder(CI); |
1296 | 30 | return Builder.CreateCall(dxilFunc, args); |
1297 | 30 | } |
1298 | | |
1299 | | Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1300 | | HLOperationLowerHelper &helper, |
1301 | | HLObjectOperationLowerHelper *pObjHelper, |
1302 | 164 | bool &Translated) { |
1303 | 164 | hlsl::OP *hlslOP = &helper.hlslOP; |
1304 | 164 | Type *Ty = Type::getVoidTy(CI->getContext()); |
1305 | | |
1306 | 164 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1307 | 164 | Value *args[] = {opArg}; |
1308 | 164 | IRBuilder<> Builder(CI); |
1309 | 164 | Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
1310 | | |
1311 | 164 | return dxilOp; |
1312 | 164 | } |
1313 | | |
1314 | | Value *TrivialNoArgWithRetOperation(CallInst *CI, IntrinsicOp IOP, |
1315 | | OP::OpCode opcode, |
1316 | | HLOperationLowerHelper &helper, |
1317 | | HLObjectOperationLowerHelper *pObjHelper, |
1318 | 360 | bool &Translated) { |
1319 | 360 | hlsl::OP *hlslOP = &helper.hlslOP; |
1320 | 360 | Type *Ty = CI->getType(); |
1321 | | |
1322 | 360 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1323 | 360 | Value *args[] = {opArg}; |
1324 | 360 | IRBuilder<> Builder(CI); |
1325 | 360 | Value *dxilOp = TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
1326 | | |
1327 | 360 | return dxilOp; |
1328 | 360 | } |
1329 | | |
1330 | | Value *TranslateGetRTSamplePos(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
1331 | | HLOperationLowerHelper &helper, |
1332 | | HLObjectOperationLowerHelper *pObjHelper, |
1333 | 16 | bool &Translated) { |
1334 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
1335 | 16 | OP::OpCode opcode = OP::OpCode::RenderTargetGetSamplePosition; |
1336 | 16 | IRBuilder<> Builder(CI); |
1337 | | |
1338 | 16 | Type *Ty = Type::getVoidTy(CI->getContext()); |
1339 | 16 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1340 | | |
1341 | 16 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1342 | 16 | Value *args[] = {opArg, val}; |
1343 | | |
1344 | 16 | Value *samplePos = |
1345 | 16 | TrivialDxilOperation(opcode, args, Ty, Ty, hlslOP, Builder); |
1346 | | |
1347 | 16 | Value *result = UndefValue::get(CI->getType()); |
1348 | 16 | Value *samplePosX = Builder.CreateExtractValue(samplePos, 0); |
1349 | 16 | Value *samplePosY = Builder.CreateExtractValue(samplePos, 1); |
1350 | 16 | result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0); |
1351 | 16 | result = Builder.CreateInsertElement(result, samplePosY, 1); |
1352 | 16 | return result; |
1353 | 16 | } |
1354 | | |
1355 | | // val QuadReadLaneAt(val, uint); |
1356 | | Value *TranslateQuadReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1357 | | HLOperationLowerHelper &helper, |
1358 | | HLObjectOperationLowerHelper *pObjHelper, |
1359 | 66 | bool &Translated) { |
1360 | 66 | hlsl::OP *hlslOP = &helper.hlslOP; |
1361 | 66 | Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)}; |
1362 | 66 | return TrivialDxilOperation(DXIL::OpCode::QuadReadLaneAt, refArgs, |
1363 | 66 | CI->getOperand(1)->getType(), CI, hlslOP); |
1364 | 66 | } |
1365 | | |
1366 | | // Quad intrinsics of the form fn(val,QuadOpKind)->val |
1367 | | Value *TranslateQuadAnyAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1368 | | HLOperationLowerHelper &helper, |
1369 | | HLObjectOperationLowerHelper *pObjHelper, |
1370 | 22 | bool &Translated) { |
1371 | 22 | hlsl::OP *hlslOP = &helper.hlslOP; |
1372 | 22 | DXIL::QuadVoteOpKind opKind; |
1373 | 22 | switch (IOP) { |
1374 | 10 | case IntrinsicOp::IOP_QuadAll: |
1375 | 10 | opKind = DXIL::QuadVoteOpKind::All; |
1376 | 10 | break; |
1377 | 12 | case IntrinsicOp::IOP_QuadAny: |
1378 | 12 | opKind = DXIL::QuadVoteOpKind::Any; |
1379 | 12 | break; |
1380 | 0 | default: |
1381 | 0 | llvm_unreachable( |
1382 | 22 | "QuadAny/QuadAll translation called with wrong isntruction"); |
1383 | 22 | } |
1384 | 22 | Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind); |
1385 | 22 | Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg}; |
1386 | 22 | return TrivialDxilOperation(DXIL::OpCode::QuadVote, refArgs, |
1387 | 22 | CI->getOperand(1)->getType(), CI, hlslOP); |
1388 | 22 | } |
1389 | | |
1390 | | // Wave intrinsics of the form fn(val,QuadOpKind)->val |
1391 | | Value *TranslateQuadReadAcross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1392 | | HLOperationLowerHelper &helper, |
1393 | | HLObjectOperationLowerHelper *pObjHelper, |
1394 | 102 | bool &Translated) { |
1395 | 102 | hlsl::OP *hlslOP = &helper.hlslOP; |
1396 | 102 | DXIL::QuadOpKind opKind; |
1397 | 102 | switch (IOP) { |
1398 | 34 | case IntrinsicOp::IOP_QuadReadAcrossX: |
1399 | 34 | opKind = DXIL::QuadOpKind::ReadAcrossX; |
1400 | 34 | break; |
1401 | 32 | case IntrinsicOp::IOP_QuadReadAcrossY: |
1402 | 32 | opKind = DXIL::QuadOpKind::ReadAcrossY; |
1403 | 32 | break; |
1404 | 0 | default: |
1405 | 0 | DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_QuadReadAcrossDiagonal); |
1406 | 0 | LLVM_FALLTHROUGH; |
1407 | 36 | case IntrinsicOp::IOP_QuadReadAcrossDiagonal: |
1408 | 36 | opKind = DXIL::QuadOpKind::ReadAcrossDiagonal; |
1409 | 36 | break; |
1410 | 102 | } |
1411 | 102 | Constant *OpArg = hlslOP->GetI8Const((unsigned)opKind); |
1412 | 102 | Value *refArgs[] = {nullptr, CI->getOperand(1), OpArg}; |
1413 | 102 | return TrivialDxilOperation(DXIL::OpCode::QuadOp, refArgs, |
1414 | 102 | CI->getOperand(1)->getType(), CI, hlslOP); |
1415 | 102 | } |
1416 | | |
1417 | | // WaveAllEqual(val<n>)->bool<n> |
1418 | | Value *TranslateWaveAllEqual(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1419 | | HLOperationLowerHelper &helper, |
1420 | | HLObjectOperationLowerHelper *pObjHelper, |
1421 | 80 | bool &Translated) { |
1422 | 80 | hlsl::OP *hlslOP = &helper.hlslOP; |
1423 | 80 | Value *src = CI->getArgOperand(HLOperandIndex::kWaveAllEqualValueOpIdx); |
1424 | 80 | IRBuilder<> Builder(CI); |
1425 | | |
1426 | 80 | Type *Ty = src->getType(); |
1427 | 80 | Type *RetTy = Type::getInt1Ty(CI->getContext()); |
1428 | 80 | if (Ty->isVectorTy()) |
1429 | 4 | RetTy = VectorType::get(RetTy, Ty->getVectorNumElements()); |
1430 | | |
1431 | 80 | Constant *opArg = |
1432 | 80 | hlslOP->GetU32Const((unsigned)DXIL::OpCode::WaveActiveAllEqual); |
1433 | 80 | Value *args[] = {opArg, src}; |
1434 | | |
1435 | 80 | return TrivialDxilOperation(DXIL::OpCode::WaveActiveAllEqual, args, Ty, RetTy, |
1436 | 80 | hlslOP, Builder); |
1437 | 80 | } |
1438 | | |
1439 | | // WaveMatch(val<n>)->uint4 |
1440 | | Value *TranslateWaveMatch(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc, |
1441 | | HLOperationLowerHelper &Helper, |
1442 | | HLObjectOperationLowerHelper *ObjHelper, |
1443 | 46 | bool &Translated) { |
1444 | 46 | hlsl::OP *Op = &Helper.hlslOP; |
1445 | 46 | IRBuilder<> Builder(CI); |
1446 | | |
1447 | | // Generate a dx.op.waveMatch call for each scalar in the input, and perform |
1448 | | // a bitwise AND between each result to derive the final bitmask in the case |
1449 | | // of vector inputs. |
1450 | | |
1451 | | // (1) Collect the list of all scalar inputs (e.g. decompose vectors) |
1452 | 46 | SmallVector<Value *, 4> ScalarInputs; |
1453 | | |
1454 | 46 | Value *Val = CI->getArgOperand(1); |
1455 | 46 | Type *ValTy = Val->getType(); |
1456 | 46 | Type *EltTy = ValTy->getScalarType(); |
1457 | | |
1458 | 46 | if (ValTy->isVectorTy()) { |
1459 | 78 | for (uint64_t i = 0, e = ValTy->getVectorNumElements(); i != e; ++i64 ) { |
1460 | 64 | Value *Elt = Builder.CreateExtractElement(Val, i); |
1461 | 64 | ScalarInputs.push_back(Elt); |
1462 | 64 | } |
1463 | 32 | } else { |
1464 | 32 | ScalarInputs.push_back(Val); |
1465 | 32 | } |
1466 | | |
1467 | 46 | Value *Res = nullptr; |
1468 | 46 | Constant *OpcArg = Op->GetU32Const((unsigned)DXIL::OpCode::WaveMatch); |
1469 | 46 | Value *Fn = Op->GetOpFunc(OP::OpCode::WaveMatch, EltTy); |
1470 | | |
1471 | | // (2) For each scalar, emit a call to dx.op.waveMatch. If this is not the |
1472 | | // first scalar, then AND the result with the accumulator. |
1473 | 142 | for (unsigned i = 0, e = ScalarInputs.size(); i != e; ++i96 ) { |
1474 | 96 | Value *Args[] = {OpcArg, ScalarInputs[i]}; |
1475 | 96 | Value *Call = Builder.CreateCall(Fn, Args); |
1476 | | |
1477 | 96 | if (Res) { |
1478 | | // Generate bitwise AND of the components |
1479 | 250 | for (unsigned j = 0; j != 4; ++j200 ) { |
1480 | 200 | Value *ResVal = Builder.CreateExtractValue(Res, j); |
1481 | 200 | Value *CallVal = Builder.CreateExtractValue(Call, j); |
1482 | 200 | Value *And = Builder.CreateAnd(ResVal, CallVal); |
1483 | 200 | Res = Builder.CreateInsertValue(Res, And, j); |
1484 | 200 | } |
1485 | 50 | } else { |
1486 | 46 | Res = Call; |
1487 | 46 | } |
1488 | 96 | } |
1489 | | |
1490 | | // (3) Convert the final aggregate into a vector to make the types match |
1491 | 46 | Value *ResVec = UndefValue::get(CI->getType()); |
1492 | 230 | for (unsigned i = 0; i != 4; ++i184 ) { |
1493 | 184 | Value *Elt = Builder.CreateExtractValue(Res, i); |
1494 | 184 | ResVec = Builder.CreateInsertElement(ResVec, Elt, i); |
1495 | 184 | } |
1496 | | |
1497 | 46 | return ResVec; |
1498 | 46 | } |
1499 | | |
1500 | | // Wave intrinsics of the form fn(valA)->valB, where no overloading takes place |
1501 | | Value *TranslateWaveA2B(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1502 | | HLOperationLowerHelper &helper, |
1503 | | HLObjectOperationLowerHelper *pObjHelper, |
1504 | 162 | bool &Translated) { |
1505 | 162 | hlsl::OP *hlslOP = &helper.hlslOP; |
1506 | 162 | Value *refArgs[] = {nullptr, CI->getOperand(1)}; |
1507 | 162 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
1508 | 162 | } |
1509 | | // Wave ballot intrinsic. |
1510 | | Value *TranslateWaveBallot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1511 | | HLOperationLowerHelper &helper, |
1512 | | HLObjectOperationLowerHelper *pObjHelper, |
1513 | 32 | bool &Translated) { |
1514 | | // The high-level operation is uint4 ballot(i1). |
1515 | | // The DXIL operation is struct.u4 ballot(i1). |
1516 | | // To avoid updating users with more than a simple replace, we translate into |
1517 | | // a call into struct.u4, then reassemble the vector. |
1518 | | // Scalarization and constant propagation take care of cleanup. |
1519 | 32 | IRBuilder<> B(CI); |
1520 | | |
1521 | | // Make the DXIL call itself. |
1522 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
1523 | 32 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
1524 | 32 | Value *refArgs[] = {opArg, CI->getOperand(1)}; |
1525 | 32 | Function *dxilFunc = |
1526 | 32 | hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext())); |
1527 | 32 | Value *dxilVal = |
1528 | 32 | B.CreateCall(dxilFunc, refArgs, hlslOP->GetOpCodeName(opcode)); |
1529 | | |
1530 | | // Assign from the call results into a vector. |
1531 | 32 | Type *ResTy = CI->getType(); |
1532 | 32 | DXASSERT_NOMSG(ResTy->isVectorTy() && ResTy->getVectorNumElements() == 4); |
1533 | 32 | DXASSERT_NOMSG(dxilVal->getType()->isStructTy() && |
1534 | 32 | dxilVal->getType()->getNumContainedTypes() == 4); |
1535 | | |
1536 | | // 'x' component is the first vector element, highest bits. |
1537 | 32 | Value *ResVal = llvm::UndefValue::get(ResTy); |
1538 | 160 | for (unsigned Idx = 0; Idx < 4; ++Idx128 ) { |
1539 | 128 | ResVal = B.CreateInsertElement( |
1540 | 128 | ResVal, B.CreateExtractValue(dxilVal, ArrayRef<unsigned>(Idx)), Idx); |
1541 | 128 | } |
1542 | | |
1543 | 32 | return ResVal; |
1544 | 32 | } |
1545 | | |
1546 | 670 | static bool WaveIntrinsicNeedsSign(OP::OpCode opcode) { |
1547 | 670 | return opcode == OP::OpCode::WaveActiveOp || |
1548 | 670 | opcode == OP::OpCode::WavePrefixOp288 ; |
1549 | 670 | } |
1550 | | |
1551 | 946 | static unsigned WaveIntrinsicToSignedOpKind(IntrinsicOp IOP) { |
1552 | 946 | if (IOP == IntrinsicOp::IOP_WaveActiveUMax || |
1553 | 946 | IOP == IntrinsicOp::IOP_WaveActiveUMin908 || |
1554 | 946 | IOP == IntrinsicOp::IOP_WaveActiveUSum870 || |
1555 | 946 | IOP == IntrinsicOp::IOP_WaveActiveUProduct840 || |
1556 | 946 | IOP == IntrinsicOp::IOP_WaveMultiPrefixUProduct834 || |
1557 | 946 | IOP == IntrinsicOp::IOP_WaveMultiPrefixUSum820 || |
1558 | 946 | IOP == IntrinsicOp::IOP_WavePrefixUSum806 || |
1559 | 946 | IOP == IntrinsicOp::IOP_WavePrefixUProduct776 ) |
1560 | 176 | return (unsigned)DXIL::SignedOpKind::Unsigned; |
1561 | 770 | return (unsigned)DXIL::SignedOpKind::Signed; |
1562 | 946 | } |
1563 | | |
1564 | 946 | static unsigned WaveIntrinsicToOpKind(IntrinsicOp IOP) { |
1565 | 946 | switch (IOP) { |
1566 | | // Bit operations. |
1567 | 28 | case IntrinsicOp::IOP_WaveActiveBitOr: |
1568 | 28 | return (unsigned)DXIL::WaveBitOpKind::Or; |
1569 | 62 | case IntrinsicOp::IOP_WaveActiveBitAnd: |
1570 | 62 | return (unsigned)DXIL::WaveBitOpKind::And; |
1571 | 44 | case IntrinsicOp::IOP_WaveActiveBitXor: |
1572 | 44 | return (unsigned)DXIL::WaveBitOpKind::Xor; |
1573 | | // Prefix operations. |
1574 | 44 | case IntrinsicOp::IOP_WavePrefixSum: |
1575 | 74 | case IntrinsicOp::IOP_WavePrefixUSum: |
1576 | 74 | return (unsigned)DXIL::WaveOpKind::Sum; |
1577 | 74 | case IntrinsicOp::IOP_WavePrefixProduct: |
1578 | 80 | case IntrinsicOp::IOP_WavePrefixUProduct: |
1579 | 80 | return (unsigned)DXIL::WaveOpKind::Product; |
1580 | | // Numeric operations. |
1581 | 46 | case IntrinsicOp::IOP_WaveActiveMax: |
1582 | 84 | case IntrinsicOp::IOP_WaveActiveUMax: |
1583 | 84 | return (unsigned)DXIL::WaveOpKind::Max; |
1584 | 60 | case IntrinsicOp::IOP_WaveActiveMin: |
1585 | 98 | case IntrinsicOp::IOP_WaveActiveUMin: |
1586 | 98 | return (unsigned)DXIL::WaveOpKind::Min; |
1587 | 90 | case IntrinsicOp::IOP_WaveActiveSum: |
1588 | 120 | case IntrinsicOp::IOP_WaveActiveUSum: |
1589 | 120 | return (unsigned)DXIL::WaveOpKind::Sum; |
1590 | 74 | case IntrinsicOp::IOP_WaveActiveProduct: |
1591 | 80 | case IntrinsicOp::IOP_WaveActiveUProduct: |
1592 | | // MultiPrefix operations |
1593 | 124 | case IntrinsicOp::IOP_WaveMultiPrefixBitAnd: |
1594 | 124 | return (unsigned)DXIL::WaveMultiPrefixOpKind::And; |
1595 | 44 | case IntrinsicOp::IOP_WaveMultiPrefixBitOr: |
1596 | 44 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Or; |
1597 | 44 | case IntrinsicOp::IOP_WaveMultiPrefixBitXor: |
1598 | 44 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Xor; |
1599 | 58 | case IntrinsicOp::IOP_WaveMultiPrefixProduct: |
1600 | 72 | case IntrinsicOp::IOP_WaveMultiPrefixUProduct: |
1601 | 72 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Product; |
1602 | 58 | case IntrinsicOp::IOP_WaveMultiPrefixSum: |
1603 | 72 | case IntrinsicOp::IOP_WaveMultiPrefixUSum: |
1604 | 72 | return (unsigned)DXIL::WaveMultiPrefixOpKind::Sum; |
1605 | 0 | default: |
1606 | 0 | DXASSERT(IOP == IntrinsicOp::IOP_WaveActiveProduct || |
1607 | 0 | IOP == IntrinsicOp::IOP_WaveActiveUProduct, |
1608 | 0 | "else caller passed incorrect value"); |
1609 | 0 | return (unsigned)DXIL::WaveOpKind::Product; |
1610 | 946 | } |
1611 | 946 | } |
1612 | | |
1613 | | // Wave intrinsics of the form fn(valA)->valA |
1614 | | Value *TranslateWaveA2A(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1615 | | HLOperationLowerHelper &helper, |
1616 | | HLObjectOperationLowerHelper *pObjHelper, |
1617 | 670 | bool &Translated) { |
1618 | 670 | hlsl::OP *hlslOP = &helper.hlslOP; |
1619 | | |
1620 | 670 | Constant *kindValInt = hlslOP->GetI8Const(WaveIntrinsicToOpKind(IOP)); |
1621 | 670 | Constant *signValInt = hlslOP->GetI8Const(WaveIntrinsicToSignedOpKind(IOP)); |
1622 | 670 | Value *refArgs[] = {nullptr, CI->getOperand(1), kindValInt, signValInt}; |
1623 | 670 | unsigned refArgCount = _countof(refArgs); |
1624 | 670 | if (!WaveIntrinsicNeedsSign(opcode)) |
1625 | 134 | refArgCount--; |
1626 | 670 | return TrivialDxilOperation(opcode, |
1627 | 670 | llvm::ArrayRef<Value *>(refArgs, refArgCount), |
1628 | 670 | CI->getOperand(1)->getType(), CI, hlslOP); |
1629 | 670 | } |
1630 | | |
1631 | | // WaveMultiPrefixOP(val<n>, mask) -> val<n> |
1632 | | Value *TranslateWaveMultiPrefix(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opc, |
1633 | | HLOperationLowerHelper &Helper, |
1634 | | HLObjectOperationLowerHelper *ObjHelper, |
1635 | 276 | bool &Translated) { |
1636 | 276 | hlsl::OP *Op = &Helper.hlslOP; |
1637 | | |
1638 | 276 | Constant *KindValInt = Op->GetI8Const(WaveIntrinsicToOpKind(IOP)); |
1639 | 276 | Constant *SignValInt = Op->GetI8Const(WaveIntrinsicToSignedOpKind(IOP)); |
1640 | | |
1641 | | // Decompose mask into scalars |
1642 | 276 | IRBuilder<> Builder(CI); |
1643 | 276 | Value *Mask = CI->getArgOperand(2); |
1644 | 276 | Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0); |
1645 | 276 | Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1); |
1646 | 276 | Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2); |
1647 | 276 | Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3); |
1648 | | |
1649 | 276 | Value *Args[] = {nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, |
1650 | 276 | Mask3, KindValInt, SignValInt}; |
1651 | | |
1652 | 276 | return TrivialDxilOperation(Opc, Args, CI->getOperand(1)->getType(), CI, Op); |
1653 | 276 | } |
1654 | | |
1655 | | // WaveMultiPrefixBitCount(i1, mask) -> i32 |
1656 | | Value *TranslateWaveMultiPrefixBitCount(CallInst *CI, IntrinsicOp IOP, |
1657 | | OP::OpCode Opc, |
1658 | | HLOperationLowerHelper &Helper, |
1659 | | HLObjectOperationLowerHelper *ObjHelper, |
1660 | 40 | bool &Translated) { |
1661 | 40 | hlsl::OP *Op = &Helper.hlslOP; |
1662 | | |
1663 | | // Decompose mask into scalars |
1664 | 40 | IRBuilder<> Builder(CI); |
1665 | 40 | Value *Mask = CI->getArgOperand(2); |
1666 | 40 | Value *Mask0 = Builder.CreateExtractElement(Mask, (uint64_t)0); |
1667 | 40 | Value *Mask1 = Builder.CreateExtractElement(Mask, (uint64_t)1); |
1668 | 40 | Value *Mask2 = Builder.CreateExtractElement(Mask, (uint64_t)2); |
1669 | 40 | Value *Mask3 = Builder.CreateExtractElement(Mask, (uint64_t)3); |
1670 | | |
1671 | 40 | Value *Args[] = {nullptr, CI->getOperand(1), Mask0, Mask1, Mask2, Mask3}; |
1672 | | |
1673 | 40 | return TrivialDxilOperation(Opc, Args, Helper.voidTy, CI, Op); |
1674 | 40 | } |
1675 | | |
1676 | | // Wave intrinsics of the form fn()->val |
1677 | | Value *TranslateWaveToVal(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1678 | | HLOperationLowerHelper &helper, |
1679 | | HLObjectOperationLowerHelper *pObjHelper, |
1680 | 96 | bool &Translated) { |
1681 | 96 | hlsl::OP *hlslOP = &helper.hlslOP; |
1682 | 96 | Value *refArgs[] = {nullptr}; |
1683 | 96 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
1684 | 96 | } |
1685 | | |
1686 | | // Wave intrinsics of the form fn(val,lane)->val |
1687 | | Value *TranslateWaveReadLaneAt(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1688 | | HLOperationLowerHelper &helper, |
1689 | | HLObjectOperationLowerHelper *pObjHelper, |
1690 | 98 | bool &Translated) { |
1691 | 98 | hlsl::OP *hlslOP = &helper.hlslOP; |
1692 | 98 | Value *refArgs[] = {nullptr, CI->getOperand(1), CI->getOperand(2)}; |
1693 | 98 | return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneAt, refArgs, |
1694 | 98 | CI->getOperand(1)->getType(), CI, hlslOP); |
1695 | 98 | } |
1696 | | |
1697 | | // Wave intrinsics of the form fn(val)->val |
1698 | | Value *TranslateWaveReadLaneFirst(CallInst *CI, IntrinsicOp IOP, |
1699 | | OP::OpCode opcode, |
1700 | | HLOperationLowerHelper &helper, |
1701 | | HLObjectOperationLowerHelper *pObjHelper, |
1702 | 274 | bool &Translated) { |
1703 | 274 | hlsl::OP *hlslOP = &helper.hlslOP; |
1704 | 274 | Value *refArgs[] = {nullptr, CI->getOperand(1)}; |
1705 | 274 | return TrivialDxilOperation(DXIL::OpCode::WaveReadLaneFirst, refArgs, |
1706 | 274 | CI->getOperand(1)->getType(), CI, hlslOP); |
1707 | 274 | } |
1708 | | |
1709 | | Value *TranslateAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1710 | | HLOperationLowerHelper &helper, |
1711 | | HLObjectOperationLowerHelper *pObjHelper, |
1712 | 950 | bool &Translated) { |
1713 | 950 | hlsl::OP *hlslOP = &helper.hlslOP; |
1714 | 950 | Type *pOverloadTy = CI->getType()->getScalarType(); |
1715 | 950 | if (pOverloadTy->isFloatingPointTy()) { |
1716 | 804 | Value *refArgs[] = {nullptr, CI->getOperand(1)}; |
1717 | 804 | return TrivialDxilOperation(DXIL::OpCode::FAbs, refArgs, CI->getType(), CI, |
1718 | 804 | hlslOP); |
1719 | 804 | } |
1720 | | |
1721 | 146 | Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1722 | 146 | IRBuilder<> Builder(CI); |
1723 | 146 | Value *neg = Builder.CreateNeg(src); |
1724 | 146 | return TrivialDxilBinaryOperation(DXIL::OpCode::IMax, src, neg, hlslOP, |
1725 | 146 | Builder); |
1726 | 950 | } |
1727 | | |
1728 | | Value *TranslateUAbs(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1729 | | HLOperationLowerHelper &helper, |
1730 | | HLObjectOperationLowerHelper *pObjHelper, |
1731 | 24 | bool &Translated) { |
1732 | 24 | return CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx); // No-op |
1733 | 24 | } |
1734 | | |
1735 | 312 | Value *GenerateCmpNEZero(Value *val, IRBuilder<> Builder) { |
1736 | 312 | Type *Ty = val->getType(); |
1737 | 312 | Type *EltTy = Ty->getScalarType(); |
1738 | | |
1739 | 312 | Constant *zero = nullptr; |
1740 | 312 | if (EltTy->isFloatingPointTy()) |
1741 | 36 | zero = ConstantFP::get(EltTy, 0); |
1742 | 276 | else |
1743 | 276 | zero = ConstantInt::get(EltTy, 0); |
1744 | | |
1745 | 312 | if (Ty != EltTy) |
1746 | 270 | zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero); |
1747 | | |
1748 | 312 | if (EltTy->isFloatingPointTy()) |
1749 | 36 | return Builder.CreateFCmpUNE(val, zero); |
1750 | | |
1751 | 276 | return Builder.CreateICmpNE(val, zero); |
1752 | 312 | } |
1753 | | |
1754 | 144 | Value *TranslateAllForValue(Value *val, IRBuilder<> &Builder) { |
1755 | 144 | Value *cond = GenerateCmpNEZero(val, Builder); |
1756 | | |
1757 | 144 | Type *Ty = val->getType(); |
1758 | 144 | Type *EltTy = Ty->getScalarType(); |
1759 | | |
1760 | 144 | if (Ty == EltTy) |
1761 | 24 | return cond; |
1762 | | |
1763 | 120 | Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0); |
1764 | 560 | for (unsigned i = 1; i < Ty->getVectorNumElements(); i++440 ) { |
1765 | 440 | Value *Elt = Builder.CreateExtractElement(cond, i); |
1766 | 440 | Result = Builder.CreateAnd(Result, Elt); |
1767 | 440 | } |
1768 | | |
1769 | 120 | return Result; |
1770 | 144 | } |
1771 | | |
1772 | | Value *TranslateAll(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1773 | | HLOperationLowerHelper &helper, |
1774 | | HLObjectOperationLowerHelper *pObjHelper, |
1775 | 144 | bool &Translated) { |
1776 | 144 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1777 | 144 | IRBuilder<> Builder(CI); |
1778 | 144 | return TranslateAllForValue(val, Builder); |
1779 | 144 | } |
1780 | | |
1781 | | Value *TranslateAny(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1782 | | HLOperationLowerHelper &helper, |
1783 | | HLObjectOperationLowerHelper *pObjHelper, |
1784 | 168 | bool &Translated) { |
1785 | 168 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1786 | | |
1787 | 168 | IRBuilder<> Builder(CI); |
1788 | | |
1789 | 168 | Value *cond = GenerateCmpNEZero(val, Builder); |
1790 | | |
1791 | 168 | Type *Ty = val->getType(); |
1792 | 168 | Type *EltTy = Ty->getScalarType(); |
1793 | | |
1794 | 168 | if (Ty == EltTy) |
1795 | 18 | return cond; |
1796 | | |
1797 | 150 | Value *Result = Builder.CreateExtractElement(cond, (uint64_t)0); |
1798 | 688 | for (unsigned i = 1; i < Ty->getVectorNumElements(); i++538 ) { |
1799 | 538 | Value *Elt = Builder.CreateExtractElement(cond, i); |
1800 | 538 | Result = Builder.CreateOr(Result, Elt); |
1801 | 538 | } |
1802 | 150 | return Result; |
1803 | 168 | } |
1804 | | |
1805 | | Value *TranslateBitcast(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1806 | | HLOperationLowerHelper &helper, |
1807 | | HLObjectOperationLowerHelper *pObjHelper, |
1808 | 1.83k | bool &Translated) { |
1809 | 1.83k | Type *Ty = CI->getType(); |
1810 | 1.83k | Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1811 | 1.83k | IRBuilder<> Builder(CI); |
1812 | 1.83k | return Builder.CreateBitCast(op, Ty); |
1813 | 1.83k | } |
1814 | | |
1815 | | Value *TranslateDoubleAsUint(Value *x, Value *lo, Value *hi, |
1816 | 32 | IRBuilder<> &Builder, hlsl::OP *hlslOP) { |
1817 | 32 | Type *Ty = x->getType(); |
1818 | 32 | Type *outTy = lo->getType()->getPointerElementType(); |
1819 | 32 | DXIL::OpCode opcode = DXIL::OpCode::SplitDouble; |
1820 | | |
1821 | 32 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); |
1822 | 32 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
1823 | | |
1824 | 32 | if (Ty->isVectorTy()) { |
1825 | 8 | Value *retValLo = llvm::UndefValue::get(outTy); |
1826 | 8 | Value *retValHi = llvm::UndefValue::get(outTy); |
1827 | 8 | unsigned vecSize = Ty->getVectorNumElements(); |
1828 | | |
1829 | 24 | for (unsigned i = 0; i < vecSize; i++16 ) { |
1830 | 16 | Value *Elt = Builder.CreateExtractElement(x, i); |
1831 | 16 | Value *EltOP = Builder.CreateCall(dxilFunc, {opArg, Elt}, |
1832 | 16 | hlslOP->GetOpCodeName(opcode)); |
1833 | 16 | Value *EltLo = Builder.CreateExtractValue(EltOP, 0); |
1834 | 16 | retValLo = Builder.CreateInsertElement(retValLo, EltLo, i); |
1835 | 16 | Value *EltHi = Builder.CreateExtractValue(EltOP, 1); |
1836 | 16 | retValHi = Builder.CreateInsertElement(retValHi, EltHi, i); |
1837 | 16 | } |
1838 | 8 | Builder.CreateStore(retValLo, lo); |
1839 | 8 | Builder.CreateStore(retValHi, hi); |
1840 | 24 | } else { |
1841 | 24 | Value *retVal = |
1842 | 24 | Builder.CreateCall(dxilFunc, {opArg, x}, hlslOP->GetOpCodeName(opcode)); |
1843 | 24 | Value *retValLo = Builder.CreateExtractValue(retVal, 0); |
1844 | 24 | Value *retValHi = Builder.CreateExtractValue(retVal, 1); |
1845 | 24 | Builder.CreateStore(retValLo, lo); |
1846 | 24 | Builder.CreateStore(retValHi, hi); |
1847 | 24 | } |
1848 | | |
1849 | 32 | return nullptr; |
1850 | 32 | } |
1851 | | |
1852 | | Value *TranslateAsUint(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1853 | | HLOperationLowerHelper &helper, |
1854 | | HLObjectOperationLowerHelper *pObjHelper, |
1855 | 600 | bool &Translated) { |
1856 | 600 | if (CI->getNumArgOperands() == 2) |
1857 | 568 | return TranslateBitcast(CI, IOP, opcode, helper, pObjHelper, Translated); |
1858 | | |
1859 | 32 | DXASSERT_NOMSG(CI->getNumArgOperands() == 4); |
1860 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
1861 | 32 | Value *x = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
1862 | 32 | DXASSERT_NOMSG(x->getType()->getScalarType()->isDoubleTy()); |
1863 | 32 | Value *lo = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
1864 | 32 | Value *hi = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
1865 | 32 | IRBuilder<> Builder(CI); |
1866 | 32 | return TranslateDoubleAsUint(x, lo, hi, Builder, hlslOP); |
1867 | 600 | } |
1868 | | |
1869 | | Value *TranslateAsDouble(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1870 | | HLOperationLowerHelper &helper, |
1871 | | HLObjectOperationLowerHelper *pObjHelper, |
1872 | 66 | bool &Translated) { |
1873 | 66 | hlsl::OP *hlslOP = &helper.hlslOP; |
1874 | 66 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1875 | 66 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1876 | | |
1877 | 66 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
1878 | 66 | IRBuilder<> Builder(CI); |
1879 | 66 | return TrivialDxilOperation(opcode, {opArg, x, y}, CI->getType(), |
1880 | 66 | CI->getType(), hlslOP, Builder); |
1881 | 66 | } |
1882 | | |
1883 | | Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1884 | | HLOperationLowerHelper &helper, |
1885 | | HLObjectOperationLowerHelper *pObjHelper, |
1886 | 56 | bool &Translated) { |
1887 | 56 | hlsl::OP *hlslOP = &helper.hlslOP; |
1888 | 56 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
1889 | 56 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
1890 | | |
1891 | 56 | IRBuilder<> Builder(CI); |
1892 | 56 | Value *tan = Builder.CreateFDiv(y, x); |
1893 | | |
1894 | 56 | Value *atan = |
1895 | 56 | TrivialDxilUnaryOperation(OP::OpCode::Atan, tan, hlslOP, Builder); |
1896 | | // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. |
1897 | 56 | Type *Ty = x->getType(); |
1898 | 56 | Constant *pi = ConstantFP::get(Ty->getScalarType(), M_PI); |
1899 | 56 | Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2); |
1900 | 56 | Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2); |
1901 | 56 | Constant *zero = ConstantFP::get(Ty->getScalarType(), 0); |
1902 | 56 | if (Ty->isVectorTy()) { |
1903 | 22 | unsigned vecSize = Ty->getVectorNumElements(); |
1904 | 22 | pi = ConstantVector::getSplat(vecSize, pi); |
1905 | 22 | halfPi = ConstantVector::getSplat(vecSize, halfPi); |
1906 | 22 | negHalfPi = ConstantVector::getSplat(vecSize, negHalfPi); |
1907 | 22 | zero = ConstantVector::getSplat(vecSize, zero); |
1908 | 22 | } |
1909 | 56 | Value *atanAddPi = Builder.CreateFAdd(atan, pi); |
1910 | 56 | Value *atanSubPi = Builder.CreateFSub(atan, pi); |
1911 | | |
1912 | | // x > 0 -> atan. |
1913 | 56 | Value *result = atan; |
1914 | 56 | Value *xLt0 = Builder.CreateFCmpOLT(x, zero); |
1915 | 56 | Value *xEq0 = Builder.CreateFCmpOEQ(x, zero); |
1916 | | |
1917 | 56 | Value *yGe0 = Builder.CreateFCmpOGE(y, zero); |
1918 | 56 | Value *yLt0 = Builder.CreateFCmpOLT(y, zero); |
1919 | | // x < 0, y >= 0 -> atan + pi. |
1920 | 56 | Value *xLt0AndyGe0 = Builder.CreateAnd(xLt0, yGe0); |
1921 | 56 | result = Builder.CreateSelect(xLt0AndyGe0, atanAddPi, result); |
1922 | | |
1923 | | // x < 0, y < 0 -> atan - pi. |
1924 | 56 | Value *xLt0AndYLt0 = Builder.CreateAnd(xLt0, yLt0); |
1925 | 56 | result = Builder.CreateSelect(xLt0AndYLt0, atanSubPi, result); |
1926 | | |
1927 | | // x == 0, y < 0 -> -pi/2 |
1928 | 56 | Value *xEq0AndYLt0 = Builder.CreateAnd(xEq0, yLt0); |
1929 | 56 | result = Builder.CreateSelect(xEq0AndYLt0, negHalfPi, result); |
1930 | | // x == 0, y > 0 -> pi/2 |
1931 | 56 | Value *xEq0AndYGe0 = Builder.CreateAnd(xEq0, yGe0); |
1932 | 56 | result = Builder.CreateSelect(xEq0AndYGe0, halfPi, result); |
1933 | | |
1934 | 56 | return result; |
1935 | 56 | } |
1936 | | |
1937 | | Value *TranslateClamp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1938 | | HLOperationLowerHelper &helper, |
1939 | | HLObjectOperationLowerHelper *pObjHelper, |
1940 | 764 | bool &Translated) { |
1941 | 764 | hlsl::OP *hlslOP = &helper.hlslOP; |
1942 | 764 | Type *Ty = CI->getType(); |
1943 | 764 | Type *EltTy = Ty->getScalarType(); |
1944 | 764 | DXIL::OpCode maxOp = DXIL::OpCode::FMax; |
1945 | 764 | DXIL::OpCode minOp = DXIL::OpCode::FMin; |
1946 | 764 | if (IOP == IntrinsicOp::IOP_uclamp) { |
1947 | 56 | maxOp = DXIL::OpCode::UMax; |
1948 | 56 | minOp = DXIL::OpCode::UMin; |
1949 | 708 | } else if (EltTy->isIntegerTy()) { |
1950 | 48 | maxOp = DXIL::OpCode::IMax; |
1951 | 48 | minOp = DXIL::OpCode::IMin; |
1952 | 48 | } |
1953 | | |
1954 | 764 | Value *x = CI->getArgOperand(HLOperandIndex::kClampOpXIdx); |
1955 | 764 | Value *maxVal = CI->getArgOperand(HLOperandIndex::kClampOpMaxIdx); |
1956 | 764 | Value *minVal = CI->getArgOperand(HLOperandIndex::kClampOpMinIdx); |
1957 | | |
1958 | 764 | IRBuilder<> Builder(CI); |
1959 | | // min(max(x, minVal), maxVal). |
1960 | 764 | Value *maxXMinVal = |
1961 | 764 | TrivialDxilBinaryOperation(maxOp, x, minVal, hlslOP, Builder); |
1962 | 764 | return TrivialDxilBinaryOperation(minOp, maxXMinVal, maxVal, hlslOP, Builder); |
1963 | 764 | } |
1964 | | |
1965 | | Value *TranslateClip(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1966 | | HLOperationLowerHelper &helper, |
1967 | | HLObjectOperationLowerHelper *pObjHelper, |
1968 | 110 | bool &Translated) { |
1969 | 110 | hlsl::OP *hlslOP = &helper.hlslOP; |
1970 | 110 | Function *discard = |
1971 | 110 | hlslOP->GetOpFunc(OP::OpCode::Discard, Type::getVoidTy(CI->getContext())); |
1972 | 110 | IRBuilder<> Builder(CI); |
1973 | 110 | Value *cond = nullptr; |
1974 | 110 | Value *arg = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
1975 | 110 | if (VectorType *VT = dyn_cast<VectorType>(arg->getType())) { |
1976 | 14 | Value *elt = Builder.CreateExtractElement(arg, (uint64_t)0); |
1977 | 14 | cond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0)); |
1978 | 50 | for (unsigned i = 1; i < VT->getNumElements(); i++36 ) { |
1979 | 36 | Value *elt = Builder.CreateExtractElement(arg, i); |
1980 | 36 | Value *eltCond = Builder.CreateFCmpOLT(elt, hlslOP->GetFloatConst(0)); |
1981 | 36 | cond = Builder.CreateOr(cond, eltCond); |
1982 | 36 | } |
1983 | 14 | } else |
1984 | 96 | cond = Builder.CreateFCmpOLT(arg, hlslOP->GetFloatConst(0)); |
1985 | | |
1986 | | /*If discard condition evaluates to false at compile-time, then |
1987 | | don't emit the discard instruction.*/ |
1988 | 110 | if (ConstantInt *constCond = dyn_cast<ConstantInt>(cond)) |
1989 | 78 | if (!constCond->getLimitedValue()) |
1990 | 10 | return nullptr; |
1991 | | |
1992 | 100 | Constant *opArg = hlslOP->GetU32Const((unsigned)OP::OpCode::Discard); |
1993 | 100 | Builder.CreateCall(discard, {opArg, cond}); |
1994 | 100 | return nullptr; |
1995 | 110 | } |
1996 | | |
1997 | | Value *TranslateCross(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
1998 | | HLOperationLowerHelper &helper, |
1999 | | HLObjectOperationLowerHelper *pObjHelper, |
2000 | 104 | bool &Translated) { |
2001 | 104 | VectorType *VT = cast<VectorType>(CI->getType()); |
2002 | 104 | DXASSERT_NOMSG(VT->getNumElements() == 3); |
2003 | | |
2004 | 104 | Value *op0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2005 | 104 | Value *op1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2006 | | |
2007 | 104 | IRBuilder<> Builder(CI); |
2008 | 104 | Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0); |
2009 | 104 | Value *op0_y = Builder.CreateExtractElement(op0, 1); |
2010 | 104 | Value *op0_z = Builder.CreateExtractElement(op0, 2); |
2011 | | |
2012 | 104 | Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0); |
2013 | 104 | Value *op1_y = Builder.CreateExtractElement(op1, 1); |
2014 | 104 | Value *op1_z = Builder.CreateExtractElement(op1, 2); |
2015 | | |
2016 | 312 | auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * { |
2017 | 312 | Value *xy = Builder.CreateFMul(x0, y1); |
2018 | 312 | Value *yx = Builder.CreateFMul(y0, x1); |
2019 | 312 | return Builder.CreateFSub(xy, yx); |
2020 | 312 | }; |
2021 | | |
2022 | 104 | Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z); |
2023 | 104 | Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x); |
2024 | 104 | Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y); |
2025 | | |
2026 | 104 | Value *cross = UndefValue::get(VT); |
2027 | 104 | cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0); |
2028 | 104 | cross = Builder.CreateInsertElement(cross, zx_xz, 1); |
2029 | 104 | cross = Builder.CreateInsertElement(cross, xy_yx, 2); |
2030 | 104 | return cross; |
2031 | 104 | } |
2032 | | |
2033 | | Value *TranslateDegrees(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2034 | | HLOperationLowerHelper &helper, |
2035 | | HLObjectOperationLowerHelper *pObjHelper, |
2036 | 32 | bool &Translated) { |
2037 | 32 | IRBuilder<> Builder(CI); |
2038 | 32 | Type *Ty = CI->getType(); |
2039 | 32 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2040 | | // 180/pi. |
2041 | 32 | Constant *toDegreeConst = ConstantFP::get(Ty->getScalarType(), 180 / M_PI); |
2042 | 32 | if (Ty != Ty->getScalarType()) { |
2043 | 16 | toDegreeConst = |
2044 | 16 | ConstantVector::getSplat(Ty->getVectorNumElements(), toDegreeConst); |
2045 | 16 | } |
2046 | 32 | return Builder.CreateFMul(toDegreeConst, val); |
2047 | 32 | } |
2048 | | |
2049 | | Value *TranslateDst(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2050 | | HLOperationLowerHelper &helper, |
2051 | | HLObjectOperationLowerHelper *pObjHelper, |
2052 | 16 | bool &Translated) { |
2053 | 16 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2054 | 16 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2055 | 16 | Type *Ty = src1->getType(); |
2056 | 16 | IRBuilder<> Builder(CI); |
2057 | 16 | Value *Result = UndefValue::get(Ty); |
2058 | 16 | Constant *oneConst = ConstantFP::get(Ty->getScalarType(), 1); |
2059 | | // dest.x = 1; |
2060 | 16 | Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0); |
2061 | | // dest.y = src0.y * src1.y; |
2062 | 16 | Value *src0_y = Builder.CreateExtractElement(src0, 1); |
2063 | 16 | Value *src1_y = Builder.CreateExtractElement(src1, 1); |
2064 | 16 | Value *yMuly = Builder.CreateFMul(src0_y, src1_y); |
2065 | 16 | Result = Builder.CreateInsertElement(Result, yMuly, 1); |
2066 | | // dest.z = src0.z; |
2067 | 16 | Value *src0_z = Builder.CreateExtractElement(src0, 2); |
2068 | 16 | Result = Builder.CreateInsertElement(Result, src0_z, 2); |
2069 | | // dest.w = src1.w; |
2070 | 16 | Value *src1_w = Builder.CreateExtractElement(src1, 3); |
2071 | 16 | Result = Builder.CreateInsertElement(Result, src1_w, 3); |
2072 | 16 | return Result; |
2073 | 16 | } |
2074 | | |
2075 | | Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2076 | | HLOperationLowerHelper &helper, |
2077 | | HLObjectOperationLowerHelper *pObjHelper, |
2078 | 204 | bool &Translated) { |
2079 | 204 | hlsl::OP *OP = &helper.hlslOP; |
2080 | 204 | IRBuilder<> Builder(CI); |
2081 | 204 | Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2082 | | |
2083 | 204 | Type *Ty = Src->getType(); |
2084 | 204 | Type *RetTy = Type::getInt32Ty(CI->getContext()); |
2085 | 204 | unsigned NumElements = 0; |
2086 | 204 | if (Ty->isVectorTy()) { |
2087 | 38 | NumElements = Ty->getVectorNumElements(); |
2088 | 38 | RetTy = VectorType::get(RetTy, NumElements); |
2089 | 38 | } |
2090 | | |
2091 | 204 | Constant *OpArg = OP->GetU32Const((unsigned)opcode); |
2092 | 204 | Value *Args[] = {OpArg, Src}; |
2093 | | |
2094 | 204 | Value *FirstbitHi = |
2095 | 204 | TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder); |
2096 | | |
2097 | 204 | IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType()); |
2098 | 204 | Constant *Neg1 = Builder.getInt32(-1); |
2099 | 204 | Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1); |
2100 | | |
2101 | 204 | if (NumElements > 0) { |
2102 | 38 | Neg1 = ConstantVector::getSplat(NumElements, Neg1); |
2103 | 38 | BitWidth = ConstantVector::getSplat(NumElements, BitWidth); |
2104 | 38 | } |
2105 | | |
2106 | 204 | Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi); |
2107 | 204 | Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi); |
2108 | 204 | return Builder.CreateSelect(Cond, Neg1, Sub); |
2109 | 204 | } |
2110 | | |
2111 | | Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2112 | | HLOperationLowerHelper &helper, |
2113 | | HLObjectOperationLowerHelper *pObjHelper, |
2114 | 178 | bool &Translated) { |
2115 | 178 | hlsl::OP *OP = &helper.hlslOP; |
2116 | 178 | IRBuilder<> Builder(CI); |
2117 | 178 | Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2118 | | |
2119 | 178 | Type *Ty = Src->getType(); |
2120 | 178 | Type *RetTy = Type::getInt32Ty(CI->getContext()); |
2121 | 178 | if (Ty->isVectorTy()) |
2122 | 40 | RetTy = VectorType::get(RetTy, Ty->getVectorNumElements()); |
2123 | | |
2124 | 178 | Constant *OpArg = OP->GetU32Const((unsigned)opcode); |
2125 | 178 | Value *Args[] = {OpArg, Src}; |
2126 | | |
2127 | 178 | Value *FirstbitLo = |
2128 | 178 | TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder); |
2129 | | |
2130 | 178 | return FirstbitLo; |
2131 | 178 | } |
2132 | | |
2133 | | Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2134 | | HLOperationLowerHelper &helper, |
2135 | | HLObjectOperationLowerHelper *pObjHelper, |
2136 | 24 | bool &Translated) { |
2137 | 24 | Value *n_dot_l = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
2138 | 24 | Value *n_dot_h = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
2139 | 24 | Value *m = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
2140 | 24 | IRBuilder<> Builder(CI); |
2141 | | |
2142 | 24 | Type *Ty = m->getType(); |
2143 | 24 | Value *Result = UndefValue::get(VectorType::get(Ty, 4)); |
2144 | | // Result = (ambient, diffuse, specular, 1) |
2145 | | // ambient = 1. |
2146 | 24 | Constant *oneConst = ConstantFP::get(Ty, 1); |
2147 | 24 | Result = Builder.CreateInsertElement(Result, oneConst, (uint64_t)0); |
2148 | | // Result.w = 1. |
2149 | 24 | Result = Builder.CreateInsertElement(Result, oneConst, 3); |
2150 | | // diffuse = (n_dot_l < 0) ? 0 : n_dot_l. |
2151 | 24 | Constant *zeroConst = ConstantFP::get(Ty, 0); |
2152 | 24 | Value *nlCmp = Builder.CreateFCmpOLT(n_dot_l, zeroConst); |
2153 | 24 | Value *diffuse = Builder.CreateSelect(nlCmp, zeroConst, n_dot_l); |
2154 | 24 | Result = Builder.CreateInsertElement(Result, diffuse, 1); |
2155 | | // specular = ((n_dot_l < 0) || (n_dot_h < 0)) ? 0: (n_dot_h ^ m). |
2156 | 24 | Value *nhCmp = Builder.CreateFCmpOLT(n_dot_h, zeroConst); |
2157 | 24 | Value *specCond = Builder.CreateOr(nlCmp, nhCmp); |
2158 | 24 | bool isFXCCompatMode = |
2159 | 24 | CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode; |
2160 | 24 | Value *nhPowM = |
2161 | 24 | TranslatePowImpl(&helper.hlslOP, Builder, n_dot_h, m, isFXCCompatMode); |
2162 | 24 | Value *spec = Builder.CreateSelect(specCond, zeroConst, nhPowM); |
2163 | 24 | Result = Builder.CreateInsertElement(Result, spec, 2); |
2164 | 24 | return Result; |
2165 | 24 | } |
2166 | | |
2167 | | Value *TranslateRadians(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2168 | | HLOperationLowerHelper &helper, |
2169 | | HLObjectOperationLowerHelper *pObjHelper, |
2170 | 36 | bool &Translated) { |
2171 | 36 | IRBuilder<> Builder(CI); |
2172 | 36 | Type *Ty = CI->getType(); |
2173 | 36 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2174 | | // pi/180. |
2175 | 36 | Constant *toRadianConst = ConstantFP::get(Ty->getScalarType(), M_PI / 180); |
2176 | 36 | if (Ty != Ty->getScalarType()) { |
2177 | 20 | toRadianConst = |
2178 | 20 | ConstantVector::getSplat(Ty->getVectorNumElements(), toRadianConst); |
2179 | 20 | } |
2180 | 36 | return Builder.CreateFMul(toRadianConst, val); |
2181 | 36 | } |
2182 | | |
2183 | | Value *TranslateF16ToF32(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2184 | | HLOperationLowerHelper &helper, |
2185 | | HLObjectOperationLowerHelper *pObjHelper, |
2186 | 384 | bool &Translated) { |
2187 | 384 | IRBuilder<> Builder(CI); |
2188 | | |
2189 | 384 | Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2190 | 384 | Type *Ty = CI->getType(); |
2191 | | |
2192 | 384 | Function *f16tof32 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy); |
2193 | 384 | return TrivialDxilOperation( |
2194 | 384 | f16tof32, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x}, |
2195 | 384 | x->getType(), Ty, &helper.hlslOP, Builder); |
2196 | 384 | } |
2197 | | |
2198 | | Value *TranslateF32ToF16(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2199 | | HLOperationLowerHelper &helper, |
2200 | | HLObjectOperationLowerHelper *pObjHelper, |
2201 | 304 | bool &Translated) { |
2202 | 304 | IRBuilder<> Builder(CI); |
2203 | | |
2204 | 304 | Value *x = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2205 | 304 | Type *Ty = CI->getType(); |
2206 | | |
2207 | 304 | Function *f32tof16 = helper.hlslOP.GetOpFunc(opcode, helper.voidTy); |
2208 | 304 | return TrivialDxilOperation( |
2209 | 304 | f32tof16, opcode, {Builder.getInt32(static_cast<unsigned>(opcode)), x}, |
2210 | 304 | x->getType(), Ty, &helper.hlslOP, Builder); |
2211 | 304 | } |
2212 | | |
2213 | 282 | Value *TranslateLength(CallInst *CI, Value *val, hlsl::OP *hlslOP) { |
2214 | 282 | IRBuilder<> Builder(CI); |
2215 | 282 | if (VectorType *VT = dyn_cast<VectorType>(val->getType())) { |
2216 | 282 | Value *Elt = Builder.CreateExtractElement(val, (uint64_t)0); |
2217 | 282 | unsigned size = VT->getNumElements(); |
2218 | 282 | if (size > 1) { |
2219 | 282 | Value *Sum = Builder.CreateFMul(Elt, Elt); |
2220 | 710 | for (unsigned i = 1; i < size; i++428 ) { |
2221 | 428 | Elt = Builder.CreateExtractElement(val, i); |
2222 | 428 | Value *Mul = Builder.CreateFMul(Elt, Elt); |
2223 | 428 | Sum = Builder.CreateFAdd(Sum, Mul); |
2224 | 428 | } |
2225 | 282 | DXIL::OpCode sqrt = DXIL::OpCode::Sqrt; |
2226 | 282 | Function *dxilSqrt = hlslOP->GetOpFunc(sqrt, VT->getElementType()); |
2227 | 282 | Value *opArg = hlslOP->GetI32Const((unsigned)sqrt); |
2228 | 282 | return Builder.CreateCall(dxilSqrt, {opArg, Sum}, |
2229 | 282 | hlslOP->GetOpCodeName(sqrt)); |
2230 | 282 | } else { |
2231 | 0 | val = Elt; |
2232 | 0 | } |
2233 | 282 | } |
2234 | 0 | DXIL::OpCode fabs = DXIL::OpCode::FAbs; |
2235 | 0 | Function *dxilFAbs = hlslOP->GetOpFunc(fabs, val->getType()); |
2236 | 0 | Value *opArg = hlslOP->GetI32Const((unsigned)fabs); |
2237 | 0 | return Builder.CreateCall(dxilFAbs, {opArg, val}, |
2238 | 0 | hlslOP->GetOpCodeName(fabs)); |
2239 | 282 | } |
2240 | | |
2241 | | Value *TranslateLength(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2242 | | HLOperationLowerHelper &helper, |
2243 | | HLObjectOperationLowerHelper *pObjHelper, |
2244 | 226 | bool &Translated) { |
2245 | 226 | hlsl::OP *hlslOP = &helper.hlslOP; |
2246 | 226 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2247 | 226 | return TranslateLength(CI, val, hlslOP); |
2248 | 226 | } |
2249 | | |
2250 | | Value *TranslateModF(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2251 | | HLOperationLowerHelper &helper, |
2252 | | HLObjectOperationLowerHelper *pObjHelper, |
2253 | 64 | bool &Translated) { |
2254 | 64 | hlsl::OP *hlslOP = &helper.hlslOP; |
2255 | 64 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2256 | 64 | Value *outIntPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2257 | 64 | IRBuilder<> Builder(CI); |
2258 | 64 | Value *intP = |
2259 | 64 | TrivialDxilUnaryOperation(OP::OpCode::Round_z, val, hlslOP, Builder); |
2260 | 64 | Value *fracP = Builder.CreateFSub(val, intP); |
2261 | 64 | Builder.CreateStore(intP, outIntPtr); |
2262 | 64 | return fracP; |
2263 | 64 | } |
2264 | | |
2265 | | Value *TranslateDistance(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2266 | | HLOperationLowerHelper &helper, |
2267 | | HLObjectOperationLowerHelper *pObjHelper, |
2268 | 56 | bool &Translated) { |
2269 | 56 | hlsl::OP *hlslOP = &helper.hlslOP; |
2270 | 56 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2271 | 56 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2272 | 56 | IRBuilder<> Builder(CI); |
2273 | 56 | Value *sub = Builder.CreateFSub(src0, src1); |
2274 | 56 | return TranslateLength(CI, sub, hlslOP); |
2275 | 56 | } |
2276 | | |
2277 | | Value *TranslateExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2278 | | HLOperationLowerHelper &helper, |
2279 | | HLObjectOperationLowerHelper *pObjHelper, |
2280 | 44 | bool &Translated) { |
2281 | 44 | hlsl::OP *hlslOP = &helper.hlslOP; |
2282 | 44 | IRBuilder<> Builder(CI); |
2283 | 44 | Type *Ty = CI->getType(); |
2284 | 44 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2285 | 44 | Constant *log2eConst = ConstantFP::get(Ty->getScalarType(), M_LOG2E); |
2286 | 44 | if (Ty != Ty->getScalarType()) { |
2287 | 20 | log2eConst = |
2288 | 20 | ConstantVector::getSplat(Ty->getVectorNumElements(), log2eConst); |
2289 | 20 | } |
2290 | 44 | val = Builder.CreateFMul(log2eConst, val); |
2291 | 44 | Value *exp = TrivialDxilUnaryOperation(OP::OpCode::Exp, val, hlslOP, Builder); |
2292 | 44 | return exp; |
2293 | 44 | } |
2294 | | |
2295 | | Value *TranslateLog(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2296 | | HLOperationLowerHelper &helper, |
2297 | | HLObjectOperationLowerHelper *pObjHelper, |
2298 | 56 | bool &Translated) { |
2299 | 56 | hlsl::OP *hlslOP = &helper.hlslOP; |
2300 | 56 | IRBuilder<> Builder(CI); |
2301 | 56 | Type *Ty = CI->getType(); |
2302 | 56 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2303 | 56 | Constant *ln2Const = ConstantFP::get(Ty->getScalarType(), M_LN2); |
2304 | 56 | if (Ty != Ty->getScalarType()) { |
2305 | 20 | ln2Const = ConstantVector::getSplat(Ty->getVectorNumElements(), ln2Const); |
2306 | 20 | } |
2307 | 56 | Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); |
2308 | | |
2309 | 56 | return Builder.CreateFMul(ln2Const, log); |
2310 | 56 | } |
2311 | | |
2312 | | Value *TranslateLog10(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2313 | | HLOperationLowerHelper &helper, |
2314 | | HLObjectOperationLowerHelper *pObjHelper, |
2315 | 24 | bool &Translated) { |
2316 | 24 | hlsl::OP *hlslOP = &helper.hlslOP; |
2317 | 24 | IRBuilder<> Builder(CI); |
2318 | 24 | Type *Ty = CI->getType(); |
2319 | 24 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2320 | 24 | Constant *log2_10Const = ConstantFP::get(Ty->getScalarType(), M_LN2 / M_LN10); |
2321 | 24 | if (Ty != Ty->getScalarType()) { |
2322 | 8 | log2_10Const = |
2323 | 8 | ConstantVector::getSplat(Ty->getVectorNumElements(), log2_10Const); |
2324 | 8 | } |
2325 | 24 | Value *log = TrivialDxilUnaryOperation(OP::OpCode::Log, val, hlslOP, Builder); |
2326 | | |
2327 | 24 | return Builder.CreateFMul(log2_10Const, log); |
2328 | 24 | } |
2329 | | |
2330 | | Value *TranslateFMod(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2331 | | HLOperationLowerHelper &helper, |
2332 | | HLObjectOperationLowerHelper *pObjHelper, |
2333 | 72 | bool &Translated) { |
2334 | 72 | hlsl::OP *hlslOP = &helper.hlslOP; |
2335 | 72 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2336 | 72 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2337 | 72 | IRBuilder<> Builder(CI); |
2338 | 72 | Value *div = Builder.CreateFDiv(src0, src1); |
2339 | 72 | Value *negDiv = Builder.CreateFNeg(div); |
2340 | 72 | Value *ge = Builder.CreateFCmpOGE(div, negDiv); |
2341 | 72 | Value *absDiv = |
2342 | 72 | TrivialDxilUnaryOperation(OP::OpCode::FAbs, div, hlslOP, Builder); |
2343 | 72 | Value *frc = |
2344 | 72 | TrivialDxilUnaryOperation(OP::OpCode::Frc, absDiv, hlslOP, Builder); |
2345 | 72 | Value *negFrc = Builder.CreateFNeg(frc); |
2346 | 72 | Value *realFrc = Builder.CreateSelect(ge, frc, negFrc); |
2347 | 72 | return Builder.CreateFMul(realFrc, src1); |
2348 | 72 | } |
2349 | | |
2350 | | Value *TranslateFUIBinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2351 | | HLOperationLowerHelper &helper, |
2352 | | HLObjectOperationLowerHelper *pObjHelper, |
2353 | 2.49k | bool &Translated) { |
2354 | 2.49k | bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy(); |
2355 | 2.49k | if (isFloat) { |
2356 | 1.33k | switch (IOP) { |
2357 | 832 | case IntrinsicOp::IOP_max: |
2358 | 832 | opcode = OP::OpCode::FMax; |
2359 | 832 | break; |
2360 | 498 | case IntrinsicOp::IOP_min: |
2361 | 498 | default: |
2362 | 498 | DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_min); |
2363 | 498 | opcode = OP::OpCode::FMin; |
2364 | 498 | break; |
2365 | 1.33k | } |
2366 | 1.33k | } |
2367 | 2.49k | return TrivialBinaryOperation(CI, IOP, opcode, helper, pObjHelper, |
2368 | 2.49k | Translated); |
2369 | 2.49k | } |
2370 | | |
2371 | | Value *TranslateFUITrinary(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2372 | | HLOperationLowerHelper &helper, |
2373 | | HLObjectOperationLowerHelper *pObjHelper, |
2374 | 11.9k | bool &Translated) { |
2375 | 11.9k | bool isFloat = CI->getType()->getScalarType()->isFloatingPointTy(); |
2376 | 11.9k | if (isFloat) { |
2377 | 11.3k | switch (IOP) { |
2378 | 11.3k | case IntrinsicOp::IOP_mad: |
2379 | 11.3k | default: |
2380 | 11.3k | DXASSERT_NOMSG(IOP == IntrinsicOp::IOP_mad); |
2381 | 11.3k | opcode = OP::OpCode::FMad; |
2382 | 11.3k | break; |
2383 | 11.3k | } |
2384 | 11.3k | } |
2385 | 11.9k | return TrivialTrinaryOperation(CI, IOP, opcode, helper, pObjHelper, |
2386 | 11.9k | Translated); |
2387 | 11.9k | } |
2388 | | |
2389 | | Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2390 | | HLOperationLowerHelper &helper, |
2391 | | HLObjectOperationLowerHelper *pObjHelper, |
2392 | 60 | bool &Translated) { |
2393 | 60 | hlsl::OP *hlslOP = &helper.hlslOP; |
2394 | 60 | Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2395 | 60 | Value *expPtr = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2396 | 60 | IRBuilder<> Builder(CI); |
2397 | 60 | Type *i32Ty = Type::getInt32Ty(CI->getContext()); |
2398 | 60 | Constant *exponentMaskConst = ConstantInt::get(i32Ty, 0x7f800000); |
2399 | 60 | Constant *mantisaMaskConst = ConstantInt::get(i32Ty, 0x007fffff); |
2400 | 60 | Constant *exponentShiftConst = ConstantInt::get(i32Ty, 23); |
2401 | 60 | Constant *mantisaOrConst = ConstantInt::get(i32Ty, 0x3f000000); |
2402 | 60 | Constant *exponentBiasConst = ConstantInt::get(i32Ty, -(int)0x3f000000); |
2403 | 60 | Constant *zeroVal = hlslOP->GetFloatConst(0); |
2404 | | // int iVal = asint(val); |
2405 | 60 | Type *dstTy = i32Ty; |
2406 | 60 | Type *Ty = val->getType(); |
2407 | 60 | if (Ty->isVectorTy()) { |
2408 | 28 | unsigned vecSize = Ty->getVectorNumElements(); |
2409 | 28 | dstTy = VectorType::get(i32Ty, vecSize); |
2410 | 28 | exponentMaskConst = ConstantVector::getSplat(vecSize, exponentMaskConst); |
2411 | 28 | mantisaMaskConst = ConstantVector::getSplat(vecSize, mantisaMaskConst); |
2412 | 28 | exponentShiftConst = ConstantVector::getSplat(vecSize, exponentShiftConst); |
2413 | 28 | mantisaOrConst = ConstantVector::getSplat(vecSize, mantisaOrConst); |
2414 | 28 | exponentBiasConst = ConstantVector::getSplat(vecSize, exponentBiasConst); |
2415 | 28 | zeroVal = ConstantVector::getSplat(vecSize, zeroVal); |
2416 | 28 | } |
2417 | | |
2418 | | // bool ne = val != 0; |
2419 | 60 | Value *notZero = Builder.CreateFCmpUNE(val, zeroVal); |
2420 | 60 | notZero = Builder.CreateSExt(notZero, dstTy); |
2421 | | |
2422 | 60 | Value *intVal = Builder.CreateBitCast(val, dstTy); |
2423 | | // temp = intVal & exponentMask; |
2424 | 60 | Value *temp = Builder.CreateAnd(intVal, exponentMaskConst); |
2425 | | // temp = temp + exponentBias; |
2426 | 60 | temp = Builder.CreateAdd(temp, exponentBiasConst); |
2427 | | // temp = temp & ne; |
2428 | 60 | temp = Builder.CreateAnd(temp, notZero); |
2429 | | // temp = temp >> exponentShift; |
2430 | 60 | temp = Builder.CreateAShr(temp, exponentShiftConst); |
2431 | | // exp = float(temp); |
2432 | 60 | Value *exp = Builder.CreateSIToFP(temp, Ty); |
2433 | 60 | Builder.CreateStore(exp, expPtr); |
2434 | | // temp = iVal & mantisaMask; |
2435 | 60 | temp = Builder.CreateAnd(intVal, mantisaMaskConst); |
2436 | | // temp = temp | mantisaOr; |
2437 | 60 | temp = Builder.CreateOr(temp, mantisaOrConst); |
2438 | | // mantisa = temp & ne; |
2439 | 60 | Value *mantisa = Builder.CreateAnd(temp, notZero); |
2440 | 60 | return Builder.CreateBitCast(mantisa, Ty); |
2441 | 60 | } |
2442 | | |
2443 | | Value *TranslateLdExp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2444 | | HLOperationLowerHelper &helper, |
2445 | | HLObjectOperationLowerHelper *pObjHelper, |
2446 | 38 | bool &Translated) { |
2447 | 38 | hlsl::OP *hlslOP = &helper.hlslOP; |
2448 | 38 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2449 | 38 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2450 | 38 | IRBuilder<> Builder(CI); |
2451 | 38 | Value *exp = |
2452 | 38 | TrivialDxilUnaryOperation(OP::OpCode::Exp, src1, hlslOP, Builder); |
2453 | 38 | return Builder.CreateFMul(exp, src0); |
2454 | 38 | } |
2455 | | |
2456 | | Value *TranslateFWidth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2457 | | HLOperationLowerHelper &helper, |
2458 | | HLObjectOperationLowerHelper *pObjHelper, |
2459 | 36 | bool &Translated) { |
2460 | 36 | hlsl::OP *hlslOP = &helper.hlslOP; |
2461 | 36 | Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2462 | 36 | IRBuilder<> Builder(CI); |
2463 | 36 | Value *ddx = |
2464 | 36 | TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseX, src, hlslOP, Builder); |
2465 | 36 | Value *absDdx = |
2466 | 36 | TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddx, hlslOP, Builder); |
2467 | 36 | Value *ddy = |
2468 | 36 | TrivialDxilUnaryOperation(OP::OpCode::DerivCoarseY, src, hlslOP, Builder); |
2469 | 36 | Value *absDdy = |
2470 | 36 | TrivialDxilUnaryOperation(OP::OpCode::FAbs, ddy, hlslOP, Builder); |
2471 | 36 | return Builder.CreateFAdd(absDdx, absDdy); |
2472 | 36 | } |
2473 | | |
2474 | | Value *TranslateLerp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2475 | | HLOperationLowerHelper &helper, |
2476 | | HLObjectOperationLowerHelper *pObjHelper, |
2477 | 348 | bool &Translated) { |
2478 | | // x + s(y-x) |
2479 | 348 | Value *x = CI->getArgOperand(HLOperandIndex::kLerpOpXIdx); |
2480 | 348 | Value *y = CI->getArgOperand(HLOperandIndex::kLerpOpYIdx); |
2481 | 348 | IRBuilder<> Builder(CI); |
2482 | 348 | Value *ySubx = Builder.CreateFSub(y, x); |
2483 | 348 | Value *s = CI->getArgOperand(HLOperandIndex::kLerpOpSIdx); |
2484 | 348 | Value *sMulSub = Builder.CreateFMul(s, ySubx); |
2485 | 348 | return Builder.CreateFAdd(x, sMulSub); |
2486 | 348 | } |
2487 | | |
2488 | | Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, Value *src1, |
2489 | 2.37k | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
2490 | 2.37k | Type *Ty = src0->getType()->getScalarType(); |
2491 | 2.37k | Function *dxilFunc = hlslOP->GetOpFunc(opcode, Ty); |
2492 | 2.37k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
2493 | | |
2494 | 2.37k | SmallVector<Value *, 9> args; |
2495 | 2.37k | args.emplace_back(opArg); |
2496 | | |
2497 | 2.37k | unsigned vecSize = src0->getType()->getVectorNumElements(); |
2498 | 9.74k | for (unsigned i = 0; i < vecSize; i++7.36k ) |
2499 | 7.36k | args.emplace_back(Builder.CreateExtractElement(src0, i)); |
2500 | | |
2501 | 9.74k | for (unsigned i = 0; i < vecSize; i++7.36k ) |
2502 | 7.36k | args.emplace_back(Builder.CreateExtractElement(src1, i)); |
2503 | 2.37k | Value *dotOP = Builder.CreateCall(dxilFunc, args); |
2504 | | |
2505 | 2.37k | return dotOP; |
2506 | 2.37k | } |
2507 | | |
2508 | | // Instead of using a DXIL intrinsic, implement a dot product operation using |
2509 | | // multiply and add operations. Used for integer dots and long vectors. |
2510 | | Value *ExpandDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, |
2511 | | IRBuilder<> &Builder, |
2512 | 376 | DXIL::OpCode MadOpCode = DXIL::OpCode::IMad) { |
2513 | 376 | Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); |
2514 | 376 | Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); |
2515 | 376 | Value *Result; |
2516 | 376 | if (Elt0->getType()->isFloatingPointTy()) |
2517 | 6 | Result = Builder.CreateFMul(Elt0, Elt1); |
2518 | 370 | else |
2519 | 370 | Result = Builder.CreateMul(Elt0, Elt1); |
2520 | 1.31k | for (unsigned Elt = 1; Elt < vecSize; ++Elt938 ) { |
2521 | 938 | Elt0 = Builder.CreateExtractElement(arg0, Elt); |
2522 | 938 | Elt1 = Builder.CreateExtractElement(arg1, Elt); |
2523 | 938 | Result = TrivialDxilTrinaryOperation(MadOpCode, Elt0, Elt1, Result, hlslOP, |
2524 | 938 | Builder); |
2525 | 938 | } |
2526 | | |
2527 | 376 | return Result; |
2528 | 376 | } |
2529 | | |
2530 | | Value *TranslateFDot(Value *arg0, Value *arg1, unsigned vecSize, |
2531 | 2.40k | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
2532 | 2.40k | switch (vecSize) { |
2533 | 196 | case 2: |
2534 | 196 | return TrivialDotOperation(OP::OpCode::Dot2, arg0, arg1, hlslOP, Builder); |
2535 | 0 | break; |
2536 | 1.75k | case 3: |
2537 | 1.75k | return TrivialDotOperation(OP::OpCode::Dot3, arg0, arg1, hlslOP, Builder); |
2538 | 0 | break; |
2539 | 424 | case 4: |
2540 | 424 | return TrivialDotOperation(OP::OpCode::Dot4, arg0, arg1, hlslOP, Builder); |
2541 | 0 | break; |
2542 | 24 | default: |
2543 | 24 | DXASSERT(vecSize == 1, "wrong vector size"); |
2544 | 24 | { |
2545 | 24 | Value *vecMul = Builder.CreateFMul(arg0, arg1); |
2546 | 24 | return Builder.CreateExtractElement(vecMul, (uint64_t)0); |
2547 | 0 | } |
2548 | 0 | break; |
2549 | 2.40k | } |
2550 | 2.40k | } |
2551 | | |
2552 | | Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2553 | | HLOperationLowerHelper &helper, |
2554 | | HLObjectOperationLowerHelper *pObjHelper, |
2555 | 1.96k | bool &Translated) { |
2556 | 1.96k | hlsl::OP *hlslOP = &helper.hlslOP; |
2557 | 1.96k | Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2558 | 1.96k | Type *Ty = arg0->getType(); |
2559 | 1.96k | unsigned vecSize = Ty->getVectorNumElements(); |
2560 | 1.96k | Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2561 | 1.96k | IRBuilder<> Builder(CI); |
2562 | 1.96k | Type *EltTy = Ty->getScalarType(); |
2563 | 1.96k | if (EltTy->isFloatingPointTy() && Ty->getVectorNumElements() <= 41.64k ) |
2564 | 1.63k | return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); |
2565 | | |
2566 | 330 | DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; |
2567 | 330 | if (IOP == IntrinsicOp::IOP_udot) |
2568 | 228 | MadOpCode = DXIL::OpCode::UMad; |
2569 | 102 | else if (EltTy->isFloatingPointTy()) |
2570 | 6 | MadOpCode = DXIL::OpCode::FMad; |
2571 | 330 | return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); |
2572 | 1.96k | } |
2573 | | |
2574 | | Value *TranslateNormalize(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2575 | | HLOperationLowerHelper &helper, |
2576 | | HLObjectOperationLowerHelper *pObjHelper, |
2577 | 648 | bool &Translated) { |
2578 | 648 | hlsl::OP *hlslOP = &helper.hlslOP; |
2579 | 648 | Type *Ty = CI->getType(); |
2580 | 648 | Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2581 | 648 | VectorType *VT = cast<VectorType>(Ty); |
2582 | 648 | unsigned vecSize = VT->getNumElements(); |
2583 | | |
2584 | 648 | IRBuilder<> Builder(CI); |
2585 | 648 | Value *dot = TranslateFDot(op, op, vecSize, hlslOP, Builder); |
2586 | 648 | DXIL::OpCode rsqrtOp = DXIL::OpCode::Rsqrt; |
2587 | 648 | Function *dxilRsqrt = hlslOP->GetOpFunc(rsqrtOp, VT->getElementType()); |
2588 | 648 | Value *rsqrt = Builder.CreateCall( |
2589 | 648 | dxilRsqrt, {hlslOP->GetI32Const((unsigned)rsqrtOp), dot}, |
2590 | 648 | hlslOP->GetOpCodeName(rsqrtOp)); |
2591 | 648 | Value *vecRsqrt = UndefValue::get(VT); |
2592 | 2.60k | for (unsigned i = 0; i < VT->getNumElements(); i++1.95k ) |
2593 | 1.95k | vecRsqrt = Builder.CreateInsertElement(vecRsqrt, rsqrt, i); |
2594 | | |
2595 | 648 | return Builder.CreateFMul(op, vecRsqrt); |
2596 | 648 | } |
2597 | | |
2598 | | Value *TranslateReflect(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2599 | | HLOperationLowerHelper &helper, |
2600 | | HLObjectOperationLowerHelper *pObjHelper, |
2601 | 16 | bool &Translated) { |
2602 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
2603 | | // v = i - 2 * n * dot(i, n). |
2604 | 16 | IRBuilder<> Builder(CI); |
2605 | 16 | Value *i = CI->getArgOperand(HLOperandIndex::kReflectOpIIdx); |
2606 | 16 | Value *n = CI->getArgOperand(HLOperandIndex::kReflectOpNIdx); |
2607 | | |
2608 | 16 | VectorType *VT = cast<VectorType>(i->getType()); |
2609 | 16 | unsigned vecSize = VT->getNumElements(); |
2610 | 16 | Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder); |
2611 | | // 2 * dot (i, n). |
2612 | 16 | dot = Builder.CreateFMul(ConstantFP::get(dot->getType(), 2.0), dot); |
2613 | | // 2 * n * dot(i, n). |
2614 | 16 | Value *vecDot = Builder.CreateVectorSplat(vecSize, dot); |
2615 | 16 | Value *nMulDot = Builder.CreateFMul(vecDot, n); |
2616 | | // i - 2 * n * dot(i, n). |
2617 | 16 | return Builder.CreateFSub(i, nMulDot); |
2618 | 16 | } |
2619 | | |
2620 | | Value *TranslateRefract(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2621 | | HLOperationLowerHelper &helper, |
2622 | | HLObjectOperationLowerHelper *pObjHelper, |
2623 | 46 | bool &Translated) { |
2624 | 46 | hlsl::OP *hlslOP = &helper.hlslOP; |
2625 | | // d = dot(i, n); |
2626 | | // t = 1 - eta * eta * ( 1 - d*d); |
2627 | | // cond = t >= 1; |
2628 | | // r = eta * i - (eta * d + sqrt(t)) * n; |
2629 | | // return cond ? r : 0; |
2630 | 46 | IRBuilder<> Builder(CI); |
2631 | 46 | Value *i = CI->getArgOperand(HLOperandIndex::kRefractOpIIdx); |
2632 | 46 | Value *n = CI->getArgOperand(HLOperandIndex::kRefractOpNIdx); |
2633 | 46 | Value *eta = CI->getArgOperand(HLOperandIndex::kRefractOpEtaIdx); |
2634 | | |
2635 | 46 | VectorType *VT = cast<VectorType>(i->getType()); |
2636 | 46 | unsigned vecSize = VT->getNumElements(); |
2637 | 46 | Value *dot = TranslateFDot(i, n, vecSize, hlslOP, Builder); |
2638 | | // eta * eta; |
2639 | 46 | Value *eta2 = Builder.CreateFMul(eta, eta); |
2640 | | // d*d; |
2641 | 46 | Value *dot2 = Builder.CreateFMul(dot, dot); |
2642 | 46 | Constant *one = ConstantFP::get(eta->getType(), 1); |
2643 | 46 | Constant *zero = ConstantFP::get(eta->getType(), 0); |
2644 | | // 1- d*d; |
2645 | 46 | dot2 = Builder.CreateFSub(one, dot2); |
2646 | | // eta * eta * (1-d*d); |
2647 | 46 | eta2 = Builder.CreateFMul(dot2, eta2); |
2648 | | // t = 1 - eta * eta * ( 1 - d*d); |
2649 | 46 | Value *t = Builder.CreateFSub(one, eta2); |
2650 | | // cond = t >= 0; |
2651 | 46 | Value *cond = Builder.CreateFCmpOGE(t, zero); |
2652 | | // eta * i; |
2653 | 46 | Value *vecEta = UndefValue::get(VT); |
2654 | 176 | for (unsigned i = 0; i < vecSize; i++130 ) |
2655 | 130 | vecEta = Builder.CreateInsertElement(vecEta, eta, i); |
2656 | 46 | Value *etaMulI = Builder.CreateFMul(i, vecEta); |
2657 | | // sqrt(t); |
2658 | 46 | Value *sqrt = TrivialDxilUnaryOperation(OP::OpCode::Sqrt, t, hlslOP, Builder); |
2659 | | // eta * d; |
2660 | 46 | Value *etaMulD = Builder.CreateFMul(eta, dot); |
2661 | | // eta * d + sqrt(t); |
2662 | 46 | Value *etaSqrt = Builder.CreateFAdd(etaMulD, sqrt); |
2663 | | // (eta * d + sqrt(t)) * n; |
2664 | 46 | Value *vecEtaSqrt = Builder.CreateVectorSplat(vecSize, etaSqrt); |
2665 | 46 | Value *r = Builder.CreateFMul(vecEtaSqrt, n); |
2666 | | // r = eta * i - (eta * d + sqrt(t)) * n; |
2667 | 46 | r = Builder.CreateFSub(etaMulI, r); |
2668 | 46 | Value *refract = |
2669 | 46 | Builder.CreateSelect(cond, r, ConstantVector::getSplat(vecSize, zero)); |
2670 | 46 | return refract; |
2671 | 46 | } |
2672 | | |
2673 | | Value *TranslateSmoothStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2674 | | HLOperationLowerHelper &helper, |
2675 | | HLObjectOperationLowerHelper *pObjHelper, |
2676 | 60 | bool &Translated) { |
2677 | 60 | hlsl::OP *hlslOP = &helper.hlslOP; |
2678 | | // s = saturate((x-min)/(max-min)). |
2679 | 60 | IRBuilder<> Builder(CI); |
2680 | 60 | Value *minVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMinIdx); |
2681 | 60 | Value *maxVal = CI->getArgOperand(HLOperandIndex::kSmoothStepOpMaxIdx); |
2682 | 60 | Value *maxSubMin = Builder.CreateFSub(maxVal, minVal); |
2683 | 60 | Value *x = CI->getArgOperand(HLOperandIndex::kSmoothStepOpXIdx); |
2684 | 60 | Value *xSubMin = Builder.CreateFSub(x, minVal); |
2685 | 60 | Value *satVal = Builder.CreateFDiv(xSubMin, maxSubMin); |
2686 | | |
2687 | 60 | Value *s = TrivialDxilUnaryOperation(DXIL::OpCode::Saturate, satVal, hlslOP, |
2688 | 60 | Builder); |
2689 | | // return s * s *(3-2*s). |
2690 | 60 | Constant *c2 = ConstantFP::get(CI->getType(), 2); |
2691 | 60 | Constant *c3 = ConstantFP::get(CI->getType(), 3); |
2692 | | |
2693 | 60 | Value *sMul2 = Builder.CreateFMul(s, c2); |
2694 | 60 | Value *result = Builder.CreateFSub(c3, sMul2); |
2695 | 60 | result = Builder.CreateFMul(s, result); |
2696 | 60 | result = Builder.CreateFMul(s, result); |
2697 | 60 | return result; |
2698 | 60 | } |
2699 | | |
2700 | | Value *TranslateMSad4(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2701 | | HLOperationLowerHelper &helper, |
2702 | | HLObjectOperationLowerHelper *pObjHelper, |
2703 | 16 | bool &Translated) { |
2704 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
2705 | 16 | Value *ref = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
2706 | 16 | Value *src = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
2707 | 16 | Value *accum = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
2708 | 16 | Type *Ty = CI->getType(); |
2709 | 16 | IRBuilder<> Builder(CI); |
2710 | 16 | Value *vecRef = UndefValue::get(Ty); |
2711 | 80 | for (unsigned i = 0; i < 4; i++64 ) |
2712 | 64 | vecRef = Builder.CreateInsertElement(vecRef, ref, i); |
2713 | | |
2714 | 16 | Value *srcX = Builder.CreateExtractElement(src, (uint64_t)0); |
2715 | 16 | Value *srcY = Builder.CreateExtractElement(src, 1); |
2716 | | |
2717 | 16 | Value *byteSrc = UndefValue::get(Ty); |
2718 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, srcX, (uint64_t)0); |
2719 | | |
2720 | | // ushr r0.yzw, srcX, l(0, 8, 16, 24) |
2721 | | // bfi r1.yzw, l(0, 8, 16, 24), l(0, 24, 16, 8), srcX, r0.yyzw |
2722 | 16 | Value *bfiOpArg = |
2723 | 16 | hlslOP->GetU32Const(static_cast<unsigned>(DXIL::OpCode::Bfi)); |
2724 | | |
2725 | 16 | Value *imm8 = hlslOP->GetU32Const(8); |
2726 | 16 | Value *imm16 = hlslOP->GetU32Const(16); |
2727 | 16 | Value *imm24 = hlslOP->GetU32Const(24); |
2728 | | |
2729 | 16 | Ty = ref->getType(); |
2730 | | // Get x[31:8]. |
2731 | 16 | Value *srcXShift = Builder.CreateLShr(srcX, imm8); |
2732 | | // y[0~7] x[31:8]. |
2733 | 16 | Value *byteSrcElt = TrivialDxilOperation( |
2734 | 16 | DXIL::OpCode::Bfi, {bfiOpArg, imm8, imm24, srcY, srcXShift}, Ty, Ty, |
2735 | 16 | hlslOP, Builder); |
2736 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 1); |
2737 | | // Get x[31:16]. |
2738 | 16 | srcXShift = Builder.CreateLShr(srcXShift, imm8); |
2739 | | // y[0~15] x[31:16]. |
2740 | 16 | byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi, |
2741 | 16 | {bfiOpArg, imm16, imm16, srcY, srcXShift}, |
2742 | 16 | Ty, Ty, hlslOP, Builder); |
2743 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 2); |
2744 | | // Get x[31:24]. |
2745 | 16 | srcXShift = Builder.CreateLShr(srcXShift, imm8); |
2746 | | // y[0~23] x[31:24]. |
2747 | 16 | byteSrcElt = TrivialDxilOperation(DXIL::OpCode::Bfi, |
2748 | 16 | {bfiOpArg, imm24, imm8, srcY, srcXShift}, |
2749 | 16 | Ty, Ty, hlslOP, Builder); |
2750 | 16 | byteSrc = Builder.CreateInsertElement(byteSrc, byteSrcElt, 3); |
2751 | | |
2752 | | // Msad on vecref and byteSrc. |
2753 | 16 | return TrivialDxilTrinaryOperation(DXIL::OpCode::Msad, vecRef, byteSrc, accum, |
2754 | 16 | hlslOP, Builder); |
2755 | 16 | } |
2756 | | |
2757 | | Value *TranslateRCP(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2758 | | HLOperationLowerHelper &helper, |
2759 | | HLObjectOperationLowerHelper *pObjHelper, |
2760 | 76 | bool &Translated) { |
2761 | 76 | Type *Ty = CI->getType(); |
2762 | 76 | Value *op = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2763 | 76 | IRBuilder<> Builder(CI); |
2764 | 76 | Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0); |
2765 | 76 | if (Ty != Ty->getScalarType()) { |
2766 | 56 | one = ConstantVector::getSplat(Ty->getVectorNumElements(), one); |
2767 | 56 | } |
2768 | 76 | return Builder.CreateFDiv(one, op); |
2769 | 76 | } |
2770 | | |
2771 | | Value *TranslateSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2772 | | HLOperationLowerHelper &helper, |
2773 | | HLObjectOperationLowerHelper *pObjHelper, |
2774 | 180 | bool &Translated) { |
2775 | 180 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2776 | 180 | Type *Ty = val->getType(); |
2777 | 180 | bool IsInt = Ty->getScalarType()->isIntegerTy(); |
2778 | | |
2779 | 180 | IRBuilder<> Builder(CI); |
2780 | 180 | Constant *zero = Constant::getNullValue(Ty); |
2781 | 180 | Value *zeroLtVal = IsInt ? Builder.CreateICmpSLT(zero, val)44 |
2782 | 180 | : Builder.CreateFCmpOLT(zero, val)136 ; |
2783 | 180 | Value *valLtZero = IsInt ? Builder.CreateICmpSLT(val, zero)44 |
2784 | 180 | : Builder.CreateFCmpOLT(val, zero)136 ; |
2785 | 180 | zeroLtVal = Builder.CreateZExt(zeroLtVal, CI->getType()); |
2786 | 180 | valLtZero = Builder.CreateZExt(valLtZero, CI->getType()); |
2787 | 180 | return Builder.CreateSub(zeroLtVal, valLtZero); |
2788 | 180 | } |
2789 | | |
2790 | | Value *TranslateUSign(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2791 | | HLOperationLowerHelper &helper, |
2792 | | HLObjectOperationLowerHelper *pObjHelper, |
2793 | 36 | bool &Translated) { |
2794 | 36 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
2795 | 36 | Type *Ty = val->getType(); |
2796 | | |
2797 | 36 | IRBuilder<> Builder(CI); |
2798 | 36 | Constant *zero = Constant::getNullValue(Ty); |
2799 | 36 | Value *nonZero = Builder.CreateICmpNE(val, zero); |
2800 | 36 | return Builder.CreateZExt(nonZero, CI->getType()); |
2801 | 36 | } |
2802 | | |
2803 | | Value *TranslateStep(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2804 | | HLOperationLowerHelper &helper, |
2805 | | HLObjectOperationLowerHelper *pObjHelper, |
2806 | 36 | bool &Translated) { |
2807 | 36 | Value *edge = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2808 | 36 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2809 | 36 | Type *Ty = CI->getType(); |
2810 | 36 | IRBuilder<> Builder(CI); |
2811 | | |
2812 | 36 | Constant *one = ConstantFP::get(Ty->getScalarType(), 1.0); |
2813 | 36 | Constant *zero = ConstantFP::get(Ty->getScalarType(), 0); |
2814 | 36 | Value *cond = Builder.CreateFCmpOLT(x, edge); |
2815 | | |
2816 | 36 | if (Ty != Ty->getScalarType()) { |
2817 | 20 | one = ConstantVector::getSplat(Ty->getVectorNumElements(), one); |
2818 | 20 | zero = ConstantVector::getSplat(Ty->getVectorNumElements(), zero); |
2819 | 20 | } |
2820 | | |
2821 | 36 | return Builder.CreateSelect(cond, zero, one); |
2822 | 36 | } |
2823 | | |
2824 | | Value *TranslatePow(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
2825 | | HLOperationLowerHelper &helper, |
2826 | | HLObjectOperationLowerHelper *pObjHelper, |
2827 | 1.43k | bool &Translated) { |
2828 | 1.43k | hlsl::OP *hlslOP = &helper.hlslOP; |
2829 | 1.43k | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2830 | 1.43k | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2831 | 1.43k | bool isFXCCompatMode = |
2832 | 1.43k | CI->getModule()->GetHLModule().GetHLOptions().bFXCCompatMode; |
2833 | 1.43k | IRBuilder<> Builder(CI); |
2834 | 1.43k | return TranslatePowImpl(hlslOP, Builder, x, y, isFXCCompatMode); |
2835 | 1.43k | } |
2836 | | |
2837 | | Value *TranslatePrintf(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
2838 | | HLOperationLowerHelper &helper, |
2839 | | HLObjectOperationLowerHelper *pObjHelper, |
2840 | 2 | bool &Translated) { |
2841 | 2 | Translated = false; |
2842 | 2 | dxilutil::EmitErrorOnInstruction(CI, |
2843 | 2 | "use of unsupported identifier 'printf'"); |
2844 | 2 | return nullptr; |
2845 | 2 | } |
2846 | | |
2847 | | Value *TranslateFaceforward(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2848 | | HLOperationLowerHelper &helper, |
2849 | | HLObjectOperationLowerHelper *pObjHelper, |
2850 | 16 | bool &Translated) { |
2851 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
2852 | 16 | Type *Ty = CI->getType(); |
2853 | | |
2854 | 16 | Value *n = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
2855 | 16 | Value *i = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
2856 | 16 | Value *ng = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
2857 | 16 | IRBuilder<> Builder(CI); |
2858 | | |
2859 | 16 | unsigned vecSize = Ty->getVectorNumElements(); |
2860 | | // -n x sign(dot(i, ng)). |
2861 | 16 | Value *dotOp = TranslateFDot(i, ng, vecSize, hlslOP, Builder); |
2862 | | |
2863 | 16 | Constant *zero = ConstantFP::get(Ty->getScalarType(), 0); |
2864 | 16 | Value *dotLtZero = Builder.CreateFCmpOLT(dotOp, zero); |
2865 | | |
2866 | 16 | Value *negN = Builder.CreateFNeg(n); |
2867 | 16 | Value *faceforward = Builder.CreateSelect(dotLtZero, n, negN); |
2868 | 16 | return faceforward; |
2869 | 16 | } |
2870 | | |
2871 | | Value *TrivialSetMeshOutputCounts(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2872 | | HLOperationLowerHelper &helper, |
2873 | | HLObjectOperationLowerHelper *pObjHelper, |
2874 | 246 | bool &Translated) { |
2875 | 246 | hlsl::OP *hlslOP = &helper.hlslOP; |
2876 | 246 | Value *src0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
2877 | 246 | Value *src1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
2878 | 246 | IRBuilder<> Builder(CI); |
2879 | 246 | Constant *opArg = hlslOP->GetU32Const((unsigned)op); |
2880 | 246 | Value *args[] = {opArg, src0, src1}; |
2881 | 246 | Function *dxilFunc = hlslOP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); |
2882 | | |
2883 | 246 | Builder.CreateCall(dxilFunc, args); |
2884 | 246 | return nullptr; |
2885 | 246 | } |
2886 | | |
2887 | | Value *TrivialDispatchMesh(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2888 | | HLOperationLowerHelper &helper, |
2889 | | HLObjectOperationLowerHelper *pObjHelper, |
2890 | 246 | bool &Translated) { |
2891 | 246 | hlsl::OP *hlslOP = &helper.hlslOP; |
2892 | 246 | Value *src0 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadX); |
2893 | 246 | Value *src1 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadY); |
2894 | 246 | Value *src2 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpThreadZ); |
2895 | 246 | Value *src3 = CI->getArgOperand(HLOperandIndex::kDispatchMeshOpPayload); |
2896 | 246 | IRBuilder<> Builder(CI); |
2897 | 246 | Constant *opArg = hlslOP->GetU32Const((unsigned)op); |
2898 | 246 | Value *args[] = {opArg, src0, src1, src2, src3}; |
2899 | 246 | Function *dxilFunc = hlslOP->GetOpFunc(op, src3->getType()); |
2900 | | |
2901 | 246 | Builder.CreateCall(dxilFunc, args); |
2902 | 246 | return nullptr; |
2903 | 246 | } |
2904 | | } // namespace |
2905 | | |
2906 | | // MOP intrinsics |
2907 | | namespace { |
2908 | | |
2909 | | Value *TranslateGetSamplePosition(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2910 | | HLOperationLowerHelper &helper, |
2911 | | HLObjectOperationLowerHelper *pObjHelper, |
2912 | 48 | bool &Translated) { |
2913 | 48 | hlsl::OP *hlslOP = &helper.hlslOP; |
2914 | 48 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
2915 | | |
2916 | 48 | IRBuilder<> Builder(CI); |
2917 | 48 | Value *sampleIdx = |
2918 | 48 | CI->getArgOperand(HLOperandIndex::kGetSamplePositionSampleIdxOpIndex); |
2919 | | |
2920 | 48 | OP::OpCode opcode = OP::OpCode::Texture2DMSGetSamplePosition; |
2921 | 48 | llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
2922 | 48 | Function *dxilFunc = |
2923 | 48 | hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext())); |
2924 | | |
2925 | 48 | Value *args[] = {opArg, handle, sampleIdx}; |
2926 | 48 | Value *samplePos = Builder.CreateCall(dxilFunc, args); |
2927 | | |
2928 | 48 | Value *result = UndefValue::get(CI->getType()); |
2929 | 48 | Value *samplePosX = Builder.CreateExtractValue(samplePos, 0); |
2930 | 48 | Value *samplePosY = Builder.CreateExtractValue(samplePos, 1); |
2931 | 48 | result = Builder.CreateInsertElement(result, samplePosX, (uint64_t)0); |
2932 | 48 | result = Builder.CreateInsertElement(result, samplePosY, 1); |
2933 | 48 | return result; |
2934 | 48 | } |
2935 | | |
2936 | | Value *TranslateGetDimensions(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
2937 | | HLOperationLowerHelper &helper, |
2938 | | HLObjectOperationLowerHelper *pObjHelper, |
2939 | 226 | bool &Translated) { |
2940 | 226 | hlsl::OP *hlslOP = &helper.hlslOP; |
2941 | | |
2942 | 226 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
2943 | 226 | DxilResource::Kind RK = pObjHelper->GetRK(handle); |
2944 | | |
2945 | 226 | IRBuilder<> Builder(CI); |
2946 | 226 | OP::OpCode opcode = OP::OpCode::GetDimensions; |
2947 | 226 | llvm::Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
2948 | 226 | Function *dxilFunc = |
2949 | 226 | hlslOP->GetOpFunc(opcode, Type::getVoidTy(CI->getContext())); |
2950 | | |
2951 | 226 | Type *i32Ty = Type::getInt32Ty(CI->getContext()); |
2952 | 226 | Value *mipLevel = UndefValue::get(i32Ty); |
2953 | 226 | unsigned widthOpIdx = HLOperandIndex::kGetDimensionsMipWidthOpIndex; |
2954 | 226 | switch (RK) { |
2955 | 0 | case DxilResource::Kind::Texture1D: |
2956 | 0 | case DxilResource::Kind::Texture1DArray: |
2957 | 56 | case DxilResource::Kind::Texture2D: |
2958 | 56 | case DxilResource::Kind::Texture2DArray: |
2959 | 66 | case DxilResource::Kind::TextureCube: |
2960 | 66 | case DxilResource::Kind::TextureCubeArray: |
2961 | 66 | case DxilResource::Kind::Texture3D: { |
2962 | 66 | Value *opMipLevel = |
2963 | 66 | CI->getArgOperand(HLOperandIndex::kGetDimensionsMipLevelOpIndex); |
2964 | | // mipLevel is in parameter, should not be pointer. |
2965 | 66 | if (!opMipLevel->getType()->isPointerTy()) |
2966 | 24 | mipLevel = opMipLevel; |
2967 | 42 | else { |
2968 | | // No mip level. |
2969 | 42 | widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex; |
2970 | 42 | mipLevel = ConstantInt::get(i32Ty, 0); |
2971 | 42 | } |
2972 | 66 | } break; |
2973 | 160 | default: |
2974 | 160 | widthOpIdx = HLOperandIndex::kGetDimensionsNoMipWidthOpIndex; |
2975 | 160 | break; |
2976 | 226 | } |
2977 | 226 | Value *args[] = {opArg, handle, mipLevel}; |
2978 | 226 | Value *dims = Builder.CreateCall(dxilFunc, args); |
2979 | | |
2980 | 226 | unsigned dimensionIdx = 0; |
2981 | | |
2982 | 226 | Value *width = Builder.CreateExtractValue(dims, dimensionIdx++); |
2983 | 226 | Value *widthPtr = CI->getArgOperand(widthOpIdx); |
2984 | 226 | if (widthPtr->getType()->getPointerElementType()->isFloatingPointTy()) |
2985 | 8 | width = Builder.CreateSIToFP(width, |
2986 | 8 | widthPtr->getType()->getPointerElementType()); |
2987 | | |
2988 | 226 | Builder.CreateStore(width, widthPtr); |
2989 | | |
2990 | 226 | if (DXIL::IsStructuredBuffer(RK)) { |
2991 | | // Set stride. |
2992 | 52 | Value *stridePtr = CI->getArgOperand(widthOpIdx + 1); |
2993 | 52 | const DataLayout &DL = helper.dataLayout; |
2994 | 52 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
2995 | 52 | Type *bufTy = pObjHelper->GetResourceType(handle); |
2996 | 52 | Type *bufRetTy = bufTy->getStructElementType(0); |
2997 | 52 | unsigned stride = DL.getTypeAllocSize(bufRetTy); |
2998 | 52 | Builder.CreateStore(hlslOP->GetU32Const(stride), stridePtr); |
2999 | 174 | } else { |
3000 | 174 | if (widthOpIdx == HLOperandIndex::kGetDimensionsMipWidthOpIndex || |
3001 | | // Samples is in w channel too. |
3002 | 174 | RK == DXIL::ResourceKind::Texture2DMS150 ) { |
3003 | | // Has mip. |
3004 | 68 | for (unsigned argIdx = widthOpIdx + 1; |
3005 | 136 | argIdx < CI->getNumArgOperands() - 1; argIdx++68 ) { |
3006 | 68 | Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++); |
3007 | 68 | Value *ptr = CI->getArgOperand(argIdx); |
3008 | 68 | if (ptr->getType()->getPointerElementType()->isFloatingPointTy()) |
3009 | 0 | dim = Builder.CreateSIToFP(dim, |
3010 | 0 | ptr->getType()->getPointerElementType()); |
3011 | 68 | Builder.CreateStore(dim, ptr); |
3012 | 68 | } |
3013 | | // NumOfLevel is in w channel. |
3014 | 68 | dimensionIdx = 3; |
3015 | 68 | Value *dim = Builder.CreateExtractValue(dims, dimensionIdx); |
3016 | 68 | Value *ptr = CI->getArgOperand(CI->getNumArgOperands() - 1); |
3017 | 68 | if (ptr->getType()->getPointerElementType()->isFloatingPointTy()) |
3018 | 0 | dim = |
3019 | 0 | Builder.CreateSIToFP(dim, ptr->getType()->getPointerElementType()); |
3020 | 68 | Builder.CreateStore(dim, ptr); |
3021 | 106 | } else { |
3022 | 292 | for (unsigned argIdx = widthOpIdx + 1; argIdx < CI->getNumArgOperands(); |
3023 | 186 | argIdx++) { |
3024 | 186 | Value *dim = Builder.CreateExtractValue(dims, dimensionIdx++); |
3025 | 186 | Value *ptr = CI->getArgOperand(argIdx); |
3026 | 186 | if (ptr->getType()->getPointerElementType()->isFloatingPointTy()) |
3027 | 8 | dim = Builder.CreateSIToFP(dim, |
3028 | 8 | ptr->getType()->getPointerElementType()); |
3029 | 186 | Builder.CreateStore(dim, ptr); |
3030 | 186 | } |
3031 | 106 | } |
3032 | 174 | } |
3033 | 226 | return nullptr; |
3034 | 226 | } |
3035 | | |
3036 | | Value *GenerateUpdateCounter(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3037 | | HLOperationLowerHelper &helper, |
3038 | | HLObjectOperationLowerHelper *pObjHelper, |
3039 | 2.94k | bool &Translated) { |
3040 | 2.94k | hlsl::OP *hlslOP = &helper.hlslOP; |
3041 | 2.94k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3042 | | |
3043 | 2.94k | pObjHelper->MarkHasCounter(handle, helper.i8Ty); |
3044 | | |
3045 | 2.94k | bool bInc = IOP == IntrinsicOp::MOP_IncrementCounter; |
3046 | 2.94k | IRBuilder<> Builder(CI); |
3047 | | |
3048 | 2.94k | OP::OpCode OpCode = OP::OpCode::BufferUpdateCounter; |
3049 | 2.94k | Value *OpCodeArg = hlslOP->GetU32Const((unsigned)OpCode); |
3050 | 2.94k | Value *IncVal = hlslOP->GetI8Const(bInc ? 12.65k : -1284 ); |
3051 | | // Create BufferUpdateCounter call. |
3052 | 2.94k | Value *Args[] = {OpCodeArg, handle, IncVal}; |
3053 | | |
3054 | 2.94k | Function *F = |
3055 | 2.94k | hlslOP->GetOpFunc(OpCode, Type::getVoidTy(handle->getContext())); |
3056 | 2.94k | return Builder.CreateCall(F, Args); |
3057 | 2.94k | } |
3058 | | |
3059 | | static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, |
3060 | 5.73k | IRBuilder<> &Builder) { |
3061 | | // Extract value part. |
3062 | 5.73k | Value *retVal = llvm::UndefValue::get(RetTy); |
3063 | 5.73k | if (RetTy->isVectorTy()) { |
3064 | 24.0k | for (unsigned i = 0; i < RetTy->getVectorNumElements(); i++19.1k ) { |
3065 | 19.1k | Value *retComp = Builder.CreateExtractValue(ResRet, i); |
3066 | 19.1k | retVal = Builder.CreateInsertElement(retVal, retComp, i); |
3067 | 19.1k | } |
3068 | 4.86k | } else { |
3069 | 870 | retVal = Builder.CreateExtractValue(ResRet, 0); |
3070 | 870 | } |
3071 | 5.73k | return retVal; |
3072 | 5.73k | } |
3073 | | |
3074 | | void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, |
3075 | | hlsl::OP *hlslOp, |
3076 | 19.7k | unsigned StatusIndex = DXIL::kResRetStatusIndex) { |
3077 | 19.7k | if (status && !isa<UndefValue>(status)2.24k ) { |
3078 | 2.24k | Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex); |
3079 | 2.24k | Value *checkAccessOp = hlslOp->GetI32Const( |
3080 | 2.24k | static_cast<unsigned>(DXIL::OpCode::CheckAccessFullyMapped)); |
3081 | 2.24k | Function *checkAccessFn = hlslOp->GetOpFunc( |
3082 | 2.24k | DXIL::OpCode::CheckAccessFullyMapped, statusVal->getType()); |
3083 | | // CheckAccess on status. |
3084 | 2.24k | Value *bStatus = |
3085 | 2.24k | Builder.CreateCall(checkAccessFn, {checkAccessOp, statusVal}); |
3086 | 2.24k | Value *extStatus = |
3087 | 2.24k | Builder.CreateZExt(bStatus, Type::getInt32Ty(status->getContext())); |
3088 | 2.24k | Builder.CreateStore(extStatus, status); |
3089 | 2.24k | } |
3090 | 19.7k | } |
3091 | | |
3092 | 3.15k | Value *SplatToVector(Value *Elt, Type *DstTy, IRBuilder<> &Builder) { |
3093 | 3.15k | Value *Result = UndefValue::get(DstTy); |
3094 | 10.1k | for (unsigned i = 0; i < DstTy->getVectorNumElements(); i++6.97k ) |
3095 | 6.97k | Result = Builder.CreateInsertElement(Result, Elt, i); |
3096 | 3.15k | return Result; |
3097 | 3.15k | } |
3098 | | |
3099 | | Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3100 | | HLOperationLowerHelper &helper, |
3101 | | HLObjectOperationLowerHelper *pObjHelper, |
3102 | 140 | bool &Translated) { |
3103 | | |
3104 | 140 | hlsl::OP *hlslOP = &helper.hlslOP; |
3105 | 140 | Value *arg0 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
3106 | 140 | Value *arg1 = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
3107 | 140 | Type *arg0Ty = arg0->getType(); |
3108 | 140 | Type *arg1Ty = arg1->getType(); |
3109 | 140 | IRBuilder<> Builder(CI); |
3110 | | |
3111 | 140 | if (arg0Ty->isVectorTy()) { |
3112 | 104 | if (arg1Ty->isVectorTy()) { |
3113 | | // mul(vector, vector) == dot(vector, vector) |
3114 | 84 | unsigned vecSize = arg0Ty->getVectorNumElements(); |
3115 | 84 | if (arg0Ty->getScalarType()->isFloatingPointTy()) { |
3116 | 38 | return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); |
3117 | 38 | } |
3118 | | |
3119 | 46 | DXIL::OpCode MadOpCode = DXIL::OpCode::IMad; |
3120 | 46 | if (IOP == IntrinsicOp::IOP_umul) |
3121 | 20 | MadOpCode = DXIL::OpCode::UMad; |
3122 | 46 | return ExpandDot(arg0, arg1, vecSize, hlslOP, Builder, MadOpCode); |
3123 | 84 | } else { |
3124 | | // mul(vector, scalar) == vector * scalar-splat |
3125 | 20 | arg1 = SplatToVector(arg1, arg0Ty, Builder); |
3126 | 20 | } |
3127 | 104 | } else { |
3128 | 36 | if (arg1Ty->isVectorTy()) { |
3129 | | // mul(scalar, vector) == scalar-splat * vector |
3130 | 24 | arg0 = SplatToVector(arg0, arg1Ty, Builder); |
3131 | 24 | } |
3132 | | // else mul(scalar, scalar) == scalar * scalar; |
3133 | 36 | } |
3134 | | |
3135 | | // create fmul/mul for the pair of vectors or scalars |
3136 | 56 | if (arg0Ty->getScalarType()->isFloatingPointTy()) { |
3137 | 26 | return Builder.CreateFMul(arg0, arg1); |
3138 | 26 | } |
3139 | 30 | return Builder.CreateMul(arg0, arg1); |
3140 | 56 | } |
3141 | | |
3142 | | // Sample intrinsics. |
3143 | | struct SampleHelper { |
3144 | | SampleHelper(CallInst *CI, OP::OpCode op, |
3145 | | HLObjectOperationLowerHelper *pObjHelper); |
3146 | | |
3147 | | OP::OpCode opcode = OP::OpCode::NumOpCodes; |
3148 | | DXIL::ResourceKind resourceKind = DXIL::ResourceKind::Invalid; |
3149 | | Value *sampledTexHandle = nullptr; |
3150 | | Value *texHandle = nullptr; |
3151 | | Value *samplerHandle = nullptr; |
3152 | | static const unsigned kMaxCoordDimensions = 4; |
3153 | | unsigned coordDimensions = 0; |
3154 | | Value *coord[kMaxCoordDimensions]; |
3155 | | Value *compareValue = nullptr; |
3156 | | Value *bias = nullptr; |
3157 | | Value *lod = nullptr; |
3158 | | // SampleGrad only. |
3159 | | static const unsigned kMaxDDXYDimensions = 3; |
3160 | | Value *ddx[kMaxDDXYDimensions]; |
3161 | | Value *ddy[kMaxDDXYDimensions]; |
3162 | | // Optional. |
3163 | | static const unsigned kMaxOffsetDimensions = 3; |
3164 | | unsigned offsetDimensions = 0; |
3165 | | Value *offset[kMaxOffsetDimensions]; |
3166 | | Value *clamp = nullptr; |
3167 | | Value *status = nullptr; |
3168 | | unsigned maxHLOperandRead = 0; |
3169 | 19.3k | Value *ReadHLOperand(CallInst *CI, unsigned opIdx) { |
3170 | 19.3k | if (CI->getNumArgOperands() > opIdx) { |
3171 | 8.85k | maxHLOperandRead = std::max(maxHLOperandRead, opIdx); |
3172 | 8.85k | return CI->getArgOperand(opIdx); |
3173 | 8.85k | } |
3174 | 10.4k | return nullptr; |
3175 | 19.3k | } |
3176 | 4.80k | void TranslateCoord(CallInst *CI, unsigned coordIdx) { |
3177 | 4.80k | Value *coordArg = ReadHLOperand(CI, coordIdx); |
3178 | 4.80k | DXASSERT_NOMSG(coordArg); |
3179 | 4.80k | DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions, |
3180 | 4.80k | "otherwise, HL coordinate dimensions mismatch"); |
3181 | 4.80k | IRBuilder<> Builder(CI); |
3182 | 15.3k | for (unsigned i = 0; i < coordDimensions; i++10.5k ) |
3183 | 10.5k | coord[i] = Builder.CreateExtractElement(coordArg, i); |
3184 | 4.80k | Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3185 | 13.5k | for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++8.69k ) |
3186 | 8.69k | coord[i] = undefF; |
3187 | 4.80k | } |
3188 | 4.34k | void TranslateOffset(CallInst *CI, unsigned offsetIdx) { |
3189 | 4.34k | IntegerType *i32Ty = Type::getInt32Ty(CI->getContext()); |
3190 | 4.34k | if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) { |
3191 | 670 | DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions, |
3192 | 670 | "otherwise, HL coordinate dimensions mismatch"); |
3193 | 670 | IRBuilder<> Builder(CI); |
3194 | 1.96k | for (unsigned i = 0; i < offsetDimensions; i++1.29k ) |
3195 | 1.29k | offset[i] = Builder.CreateExtractElement(offsetArg, i); |
3196 | 3.67k | } else { |
3197 | | // Use zeros for offsets when not specified, not undef. |
3198 | 3.67k | Value *zero = ConstantInt::get(i32Ty, (uint64_t)0); |
3199 | 10.1k | for (unsigned i = 0; i < offsetDimensions; i++6.48k ) |
3200 | 6.48k | offset[i] = zero; |
3201 | 3.67k | } |
3202 | | // Use undef for components that should not be used for this resource dim. |
3203 | 4.34k | Value *undefI = UndefValue::get(i32Ty); |
3204 | 9.60k | for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++5.25k ) |
3205 | 5.25k | offset[i] = undefI; |
3206 | 4.34k | } |
3207 | 292 | void SetBias(CallInst *CI, unsigned biasIdx) { |
3208 | | // Clamp bias for immediate. |
3209 | 292 | bias = ReadHLOperand(CI, biasIdx); |
3210 | 292 | DXASSERT_NOMSG(bias); |
3211 | 292 | if (ConstantFP *FP = dyn_cast<ConstantFP>(bias)) { |
3212 | 208 | float v = FP->getValueAPF().convertToFloat(); |
3213 | 208 | if (v > DXIL::kMaxMipLodBias) |
3214 | 16 | bias = ConstantFP::get(FP->getType(), DXIL::kMaxMipLodBias); |
3215 | 208 | if (v < DXIL::kMinMipLodBias) |
3216 | 24 | bias = ConstantFP::get(FP->getType(), DXIL::kMinMipLodBias); |
3217 | 208 | } |
3218 | 292 | } |
3219 | 1.21k | void SetLOD(CallInst *CI, unsigned lodIdx) { |
3220 | 1.21k | lod = ReadHLOperand(CI, lodIdx); |
3221 | 1.21k | DXASSERT_NOMSG(lod); |
3222 | 1.21k | } |
3223 | 634 | void SetCompareValue(CallInst *CI, unsigned cmpIdx) { |
3224 | 634 | compareValue = ReadHLOperand(CI, cmpIdx); |
3225 | 634 | DXASSERT_NOMSG(compareValue); |
3226 | 634 | } |
3227 | 3.28k | void SetClamp(CallInst *CI, unsigned clampIdx) { |
3228 | 3.28k | if ((clamp = ReadHLOperand(CI, clampIdx))) { |
3229 | 492 | if (clamp->getType()->isVectorTy()) { |
3230 | 0 | IRBuilder<> Builder(CI); |
3231 | 0 | clamp = Builder.CreateExtractElement(clamp, (uint64_t)0); |
3232 | 0 | } |
3233 | 492 | } else |
3234 | 2.79k | clamp = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3235 | 3.28k | } |
3236 | 4.34k | void SetStatus(CallInst *CI, unsigned statusIdx) { |
3237 | 4.34k | status = ReadHLOperand(CI, statusIdx); |
3238 | 4.34k | } |
3239 | 200 | void SetDDX(CallInst *CI, unsigned ddxIdx) { |
3240 | 200 | SetDDXY(CI, ddx, ReadHLOperand(CI, ddxIdx)); |
3241 | 200 | } |
3242 | 200 | void SetDDY(CallInst *CI, unsigned ddyIdx) { |
3243 | 200 | SetDDXY(CI, ddy, ReadHLOperand(CI, ddyIdx)); |
3244 | 200 | } |
3245 | 400 | void SetDDXY(CallInst *CI, MutableArrayRef<Value *> ddxy, Value *ddxyArg) { |
3246 | 400 | DXASSERT_NOMSG(ddxyArg); |
3247 | 400 | IRBuilder<> Builder(CI); |
3248 | 400 | unsigned ddxySize = ddxyArg->getType()->getVectorNumElements(); |
3249 | 1.32k | for (unsigned i = 0; i < ddxySize; i++928 ) |
3250 | 928 | ddxy[i] = Builder.CreateExtractElement(ddxyArg, i); |
3251 | 400 | Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3252 | 672 | for (unsigned i = ddxySize; i < kMaxDDXYDimensions; i++272 ) |
3253 | 272 | ddxy[i] = undefF; |
3254 | 400 | } |
3255 | | }; |
3256 | | |
3257 | | SampleHelper::SampleHelper(CallInst *CI, OP::OpCode op, |
3258 | | HLObjectOperationLowerHelper *pObjHelper) |
3259 | 4.80k | : opcode(op) { |
3260 | | |
3261 | 4.80k | texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3262 | 4.80k | resourceKind = pObjHelper->GetRK(texHandle); |
3263 | 4.80k | if (resourceKind == DXIL::ResourceKind::Invalid) { |
3264 | 0 | opcode = DXIL::OpCode::NumOpCodes; |
3265 | 0 | return; |
3266 | 0 | } |
3267 | | |
3268 | 4.80k | coordDimensions = opcode == DXIL::OpCode::CalculateLOD |
3269 | 4.80k | ? DxilResource::GetNumDimensionsForCalcLOD(resourceKind)164 |
3270 | 4.80k | : DxilResource::GetNumCoords(resourceKind)4.64k ; |
3271 | 4.80k | offsetDimensions = DxilResource::GetNumOffsets(resourceKind); |
3272 | | |
3273 | 4.80k | const bool bFeedbackOp = hlsl::OP::IsDxilOpFeedback(op); |
3274 | 4.80k | sampledTexHandle = |
3275 | 4.80k | bFeedbackOp ? CI->getArgOperand( |
3276 | 300 | HLOperandIndex::kWriteSamplerFeedbackSampledArgIndex) |
3277 | 4.80k | : nullptr4.50k ; |
3278 | 4.80k | const unsigned kSamplerArgIndex = |
3279 | 4.80k | bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackSamplerArgIndex300 |
3280 | 4.80k | : HLOperandIndex::kSampleSamplerArgIndex4.50k ; |
3281 | 4.80k | samplerHandle = CI->getArgOperand(kSamplerArgIndex); |
3282 | | |
3283 | 4.80k | const unsigned kCoordArgIdx = |
3284 | 4.80k | bFeedbackOp ? HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex300 |
3285 | 4.80k | : HLOperandIndex::kSampleCoordArgIndex4.50k ; |
3286 | 4.80k | TranslateCoord(CI, kCoordArgIdx); |
3287 | | |
3288 | | // TextureCube does not support offsets, shifting each subsequent arg index |
3289 | | // down by 1 |
3290 | 4.80k | unsigned cube = (resourceKind == DXIL::ResourceKind::TextureCube || |
3291 | 4.80k | resourceKind == DXIL::ResourceKind::TextureCubeArray4.61k ) |
3292 | 4.80k | ? 1402 |
3293 | 4.80k | : 04.40k ; |
3294 | | |
3295 | 4.80k | switch (op) { |
3296 | 2.41k | case OP::OpCode::Sample: |
3297 | 2.41k | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx66 |
3298 | 2.41k | : HLOperandIndex::kSampleOffsetArgIndex2.34k ); |
3299 | 2.41k | SetClamp(CI, HLOperandIndex::kSampleClampArgIndex - cube); |
3300 | 2.41k | SetStatus(CI, HLOperandIndex::kSampleStatusArgIndex - cube); |
3301 | 2.41k | break; |
3302 | 1.00k | case OP::OpCode::SampleLevel: |
3303 | 1.00k | SetLOD(CI, HLOperandIndex::kSampleLLevelArgIndex); |
3304 | 1.00k | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx44 |
3305 | 1.00k | : HLOperandIndex::kSampleLOffsetArgIndex956 ); |
3306 | 1.00k | SetStatus(CI, HLOperandIndex::kSampleLStatusArgIndex - cube); |
3307 | 1.00k | break; |
3308 | 180 | case OP::OpCode::SampleBias: |
3309 | 180 | SetBias(CI, HLOperandIndex::kSampleBBiasArgIndex); |
3310 | 180 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx48 |
3311 | 180 | : HLOperandIndex::kSampleBOffsetArgIndex132 ); |
3312 | 180 | SetClamp(CI, HLOperandIndex::kSampleBClampArgIndex - cube); |
3313 | 180 | SetStatus(CI, HLOperandIndex::kSampleBStatusArgIndex - cube); |
3314 | 180 | break; |
3315 | 206 | case OP::OpCode::SampleCmp: |
3316 | 206 | SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex); |
3317 | 206 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx66 |
3318 | 206 | : HLOperandIndex::kSampleCmpOffsetArgIndex140 ); |
3319 | 206 | SetClamp(CI, HLOperandIndex::kSampleCmpClampArgIndex - cube); |
3320 | 206 | SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube); |
3321 | 206 | break; |
3322 | 48 | case OP::OpCode::SampleCmpBias: |
3323 | 48 | SetBias(CI, HLOperandIndex::kSampleCmpBBiasArgIndex); |
3324 | 48 | SetCompareValue(CI, HLOperandIndex::kSampleCmpBCmpValArgIndex); |
3325 | 48 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx8 |
3326 | 48 | : HLOperandIndex::kSampleCmpBOffsetArgIndex40 ); |
3327 | 48 | SetClamp(CI, HLOperandIndex::kSampleCmpBClampArgIndex - cube); |
3328 | 48 | SetStatus(CI, HLOperandIndex::kSampleCmpBStatusArgIndex - cube); |
3329 | 48 | break; |
3330 | 48 | case OP::OpCode::SampleCmpGrad: |
3331 | 48 | SetDDX(CI, HLOperandIndex::kSampleCmpGDDXArgIndex); |
3332 | 48 | SetDDY(CI, HLOperandIndex::kSampleCmpGDDYArgIndex); |
3333 | 48 | SetCompareValue(CI, HLOperandIndex::kSampleCmpGCmpValArgIndex); |
3334 | 48 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx16 |
3335 | 48 | : HLOperandIndex::kSampleCmpGOffsetArgIndex32 ); |
3336 | 48 | SetClamp(CI, HLOperandIndex::kSampleCmpGClampArgIndex - cube); |
3337 | 48 | SetStatus(CI, HLOperandIndex::kSampleCmpGStatusArgIndex - cube); |
3338 | 48 | break; |
3339 | 192 | case OP::OpCode::SampleCmpLevel: |
3340 | 192 | SetCompareValue(CI, HLOperandIndex::kSampleCmpCmpValArgIndex); |
3341 | 192 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx48 |
3342 | 192 | : HLOperandIndex::kSampleCmpLOffsetArgIndex144 ); |
3343 | 192 | SetLOD(CI, HLOperandIndex::kSampleCmpLLevelArgIndex); |
3344 | 192 | SetStatus(CI, HLOperandIndex::kSampleCmpStatusArgIndex - cube); |
3345 | 192 | break; |
3346 | 140 | case OP::OpCode::SampleCmpLevelZero: |
3347 | 140 | SetCompareValue(CI, HLOperandIndex::kSampleCmpLZCmpValArgIndex); |
3348 | 140 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx32 |
3349 | 140 | : HLOperandIndex::kSampleCmpLZOffsetArgIndex108 ); |
3350 | 140 | SetStatus(CI, HLOperandIndex::kSampleCmpLZStatusArgIndex - cube); |
3351 | 140 | break; |
3352 | 120 | case OP::OpCode::SampleGrad: |
3353 | 120 | SetDDX(CI, HLOperandIndex::kSampleGDDXArgIndex); |
3354 | 120 | SetDDY(CI, HLOperandIndex::kSampleGDDYArgIndex); |
3355 | 120 | TranslateOffset(CI, cube ? HLOperandIndex::kInvalidIdx48 |
3356 | 120 | : HLOperandIndex::kSampleGOffsetArgIndex72 ); |
3357 | 120 | SetClamp(CI, HLOperandIndex::kSampleGClampArgIndex - cube); |
3358 | 120 | SetStatus(CI, HLOperandIndex::kSampleGStatusArgIndex - cube); |
3359 | 120 | break; |
3360 | 164 | case OP::OpCode::CalculateLOD: |
3361 | | // Only need coord for LOD calculation. |
3362 | 164 | break; |
3363 | 180 | case OP::OpCode::WriteSamplerFeedback: |
3364 | 180 | SetClamp(CI, HLOperandIndex::kWriteSamplerFeedback_ClampArgIndex); |
3365 | 180 | break; |
3366 | 64 | case OP::OpCode::WriteSamplerFeedbackBias: |
3367 | 64 | SetBias(CI, HLOperandIndex::kWriteSamplerFeedbackBias_BiasArgIndex); |
3368 | 64 | SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackBias_ClampArgIndex); |
3369 | 64 | break; |
3370 | 32 | case OP::OpCode::WriteSamplerFeedbackGrad: |
3371 | 32 | SetDDX(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdxArgIndex); |
3372 | 32 | SetDDY(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_DdyArgIndex); |
3373 | 32 | SetClamp(CI, HLOperandIndex::kWriteSamplerFeedbackGrad_ClampArgIndex); |
3374 | 32 | break; |
3375 | 24 | case OP::OpCode::WriteSamplerFeedbackLevel: |
3376 | 24 | SetLOD(CI, HLOperandIndex::kWriteSamplerFeedbackLevel_LodArgIndex); |
3377 | 24 | break; |
3378 | 0 | default: |
3379 | 0 | DXASSERT(0, "invalid opcode for Sample"); |
3380 | 0 | break; |
3381 | 4.80k | } |
3382 | 4.80k | DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1, |
3383 | 4.80k | "otherwise, unused HL arguments for Sample op"); |
3384 | 4.80k | } |
3385 | | |
3386 | | Value *TranslateCalculateLOD(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3387 | | HLOperationLowerHelper &helper, |
3388 | | HLObjectOperationLowerHelper *pObjHelper, |
3389 | 164 | bool &Translated) { |
3390 | 164 | hlsl::OP *hlslOP = &helper.hlslOP; |
3391 | 164 | SampleHelper sampleHelper(CI, OP::OpCode::CalculateLOD, pObjHelper); |
3392 | 164 | if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) { |
3393 | 0 | Translated = false; |
3394 | 0 | return nullptr; |
3395 | 0 | } |
3396 | | |
3397 | 164 | bool bClamped = IOP == IntrinsicOp::MOP_CalculateLevelOfDetail; |
3398 | 164 | IRBuilder<> Builder(CI); |
3399 | 164 | Value *opArg = |
3400 | 164 | hlslOP->GetU32Const(static_cast<unsigned>(OP::OpCode::CalculateLOD)); |
3401 | 164 | Value *clamped = hlslOP->GetI1Const(bClamped); |
3402 | | |
3403 | 164 | Value *args[] = {opArg, |
3404 | 164 | sampleHelper.texHandle, |
3405 | 164 | sampleHelper.samplerHandle, |
3406 | 164 | sampleHelper.coord[0], |
3407 | 164 | sampleHelper.coord[1], |
3408 | 164 | sampleHelper.coord[2], |
3409 | 164 | clamped}; |
3410 | 164 | Function *dxilFunc = hlslOP->GetOpFunc(OP::OpCode::CalculateLOD, |
3411 | 164 | Type::getFloatTy(opArg->getContext())); |
3412 | 164 | Value *LOD = Builder.CreateCall(dxilFunc, args); |
3413 | 164 | return LOD; |
3414 | 164 | } |
3415 | | |
3416 | | Value *TranslateCheckAccess(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3417 | | HLOperationLowerHelper &helper, |
3418 | | HLObjectOperationLowerHelper *pObjHelper, |
3419 | 456 | bool &Translated) { |
3420 | | // Translate CheckAccess into uint->bool, later optimization should remove it. |
3421 | | // Real checkaccess is generated in UpdateStatus. |
3422 | 456 | IRBuilder<> Builder(CI); |
3423 | 456 | Value *V = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
3424 | 456 | return Builder.CreateTrunc(V, helper.i1Ty); |
3425 | 456 | } |
3426 | | |
3427 | | void GenerateDxilSample(CallInst *CI, Function *F, ArrayRef<Value *> sampleArgs, |
3428 | 4.34k | Value *status, hlsl::OP *hlslOp) { |
3429 | 4.34k | IRBuilder<> Builder(CI); |
3430 | | |
3431 | 4.34k | CallInst *call = Builder.CreateCall(F, sampleArgs); |
3432 | | |
3433 | 4.34k | dxilutil::MigrateDebugValue(CI, call); |
3434 | | |
3435 | | // extract value part |
3436 | 4.34k | Value *retVal = ScalarizeResRet(CI->getType(), call, Builder); |
3437 | | |
3438 | | // Replace ret val. |
3439 | 4.34k | CI->replaceAllUsesWith(retVal); |
3440 | | |
3441 | | // get status |
3442 | 4.34k | if (status) { |
3443 | 340 | UpdateStatus(call, status, Builder, hlslOp); |
3444 | 340 | } |
3445 | 4.34k | } |
3446 | | |
3447 | | Value *TranslateSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3448 | | HLOperationLowerHelper &helper, |
3449 | | HLObjectOperationLowerHelper *pObjHelper, |
3450 | 4.34k | bool &Translated) { |
3451 | 4.34k | hlsl::OP *hlslOP = &helper.hlslOP; |
3452 | 4.34k | SampleHelper sampleHelper(CI, opcode, pObjHelper); |
3453 | | |
3454 | 4.34k | if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) { |
3455 | 0 | Translated = false; |
3456 | 0 | return nullptr; |
3457 | 0 | } |
3458 | 4.34k | Type *Ty = CI->getType(); |
3459 | | |
3460 | 4.34k | Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); |
3461 | | |
3462 | 4.34k | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
3463 | | |
3464 | 4.34k | switch (opcode) { |
3465 | 2.41k | case OP::OpCode::Sample: { |
3466 | 2.41k | Value *sampleArgs[] = { |
3467 | 2.41k | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3468 | | // Coord. |
3469 | 2.41k | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3470 | 2.41k | sampleHelper.coord[3], |
3471 | | // Offset. |
3472 | 2.41k | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3473 | | // Clamp. |
3474 | 2.41k | sampleHelper.clamp}; |
3475 | 2.41k | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3476 | 2.41k | } break; |
3477 | 1.00k | case OP::OpCode::SampleLevel: { |
3478 | 1.00k | Value *sampleArgs[] = { |
3479 | 1.00k | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3480 | | // Coord. |
3481 | 1.00k | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3482 | 1.00k | sampleHelper.coord[3], |
3483 | | // Offset. |
3484 | 1.00k | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3485 | | // LOD. |
3486 | 1.00k | sampleHelper.lod}; |
3487 | 1.00k | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3488 | 1.00k | } break; |
3489 | 120 | case OP::OpCode::SampleGrad: { |
3490 | 120 | Value *sampleArgs[] = { |
3491 | 120 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3492 | | // Coord. |
3493 | 120 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3494 | 120 | sampleHelper.coord[3], |
3495 | | // Offset. |
3496 | 120 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3497 | | // Ddx. |
3498 | 120 | sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2], |
3499 | | // Ddy. |
3500 | 120 | sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2], |
3501 | | // Clamp. |
3502 | 120 | sampleHelper.clamp}; |
3503 | 120 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3504 | 120 | } break; |
3505 | 180 | case OP::OpCode::SampleBias: { |
3506 | 180 | Value *sampleArgs[] = { |
3507 | 180 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3508 | | // Coord. |
3509 | 180 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3510 | 180 | sampleHelper.coord[3], |
3511 | | // Offset. |
3512 | 180 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3513 | | // Bias. |
3514 | 180 | sampleHelper.bias, |
3515 | | // Clamp. |
3516 | 180 | sampleHelper.clamp}; |
3517 | 180 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3518 | 180 | } break; |
3519 | 48 | case OP::OpCode::SampleCmpBias: { |
3520 | 48 | Value *sampleArgs[] = { |
3521 | 48 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3522 | | // Coord. |
3523 | 48 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3524 | 48 | sampleHelper.coord[3], |
3525 | | // Offset. |
3526 | 48 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3527 | | // CmpVal. |
3528 | 48 | sampleHelper.compareValue, |
3529 | | // Bias. |
3530 | 48 | sampleHelper.bias, |
3531 | | // Clamp. |
3532 | 48 | sampleHelper.clamp}; |
3533 | 48 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3534 | 48 | } break; |
3535 | 48 | case OP::OpCode::SampleCmpGrad: { |
3536 | 48 | Value *sampleArgs[] = { |
3537 | 48 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3538 | | // Coord. |
3539 | 48 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3540 | 48 | sampleHelper.coord[3], |
3541 | | // Offset. |
3542 | 48 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3543 | | // CmpVal. |
3544 | 48 | sampleHelper.compareValue, |
3545 | | // Ddx. |
3546 | 48 | sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2], |
3547 | | // Ddy. |
3548 | 48 | sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2], |
3549 | | // Clamp. |
3550 | 48 | sampleHelper.clamp}; |
3551 | 48 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3552 | 48 | } break; |
3553 | 206 | case OP::OpCode::SampleCmp: { |
3554 | 206 | Value *sampleArgs[] = { |
3555 | 206 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3556 | | // Coord. |
3557 | 206 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3558 | 206 | sampleHelper.coord[3], |
3559 | | // Offset. |
3560 | 206 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3561 | | // CmpVal. |
3562 | 206 | sampleHelper.compareValue, |
3563 | | // Clamp. |
3564 | 206 | sampleHelper.clamp}; |
3565 | 206 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3566 | 206 | } break; |
3567 | 192 | case OP::OpCode::SampleCmpLevel: { |
3568 | 192 | Value *sampleArgs[] = { |
3569 | 192 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3570 | | // Coord. |
3571 | 192 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3572 | 192 | sampleHelper.coord[3], |
3573 | | // Offset. |
3574 | 192 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3575 | | // CmpVal. |
3576 | 192 | sampleHelper.compareValue, |
3577 | | // LOD. |
3578 | 192 | sampleHelper.lod}; |
3579 | 192 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3580 | 192 | } break; |
3581 | 140 | case OP::OpCode::SampleCmpLevelZero: |
3582 | 140 | default: { |
3583 | 140 | DXASSERT(opcode == OP::OpCode::SampleCmpLevelZero, "invalid sample opcode"); |
3584 | 140 | Value *sampleArgs[] = { |
3585 | 140 | opArg, sampleHelper.texHandle, sampleHelper.samplerHandle, |
3586 | | // Coord. |
3587 | 140 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3588 | 140 | sampleHelper.coord[3], |
3589 | | // Offset. |
3590 | 140 | sampleHelper.offset[0], sampleHelper.offset[1], sampleHelper.offset[2], |
3591 | | // CmpVal. |
3592 | 140 | sampleHelper.compareValue}; |
3593 | 140 | GenerateDxilSample(CI, F, sampleArgs, sampleHelper.status, hlslOP); |
3594 | 140 | } break; |
3595 | 4.34k | } |
3596 | | // CI is replaced in GenerateDxilSample. |
3597 | 4.34k | return nullptr; |
3598 | 4.34k | } |
3599 | | |
3600 | | // Gather intrinsics. |
3601 | | struct GatherHelper { |
3602 | | enum class GatherChannel { |
3603 | | GatherAll, |
3604 | | GatherRed, |
3605 | | GatherGreen, |
3606 | | GatherBlue, |
3607 | | GatherAlpha, |
3608 | | }; |
3609 | | |
3610 | | GatherHelper(CallInst *CI, OP::OpCode op, |
3611 | | HLObjectOperationLowerHelper *pObjHelper, |
3612 | | GatherHelper::GatherChannel ch); |
3613 | | |
3614 | | OP::OpCode opcode; |
3615 | | Value *texHandle; |
3616 | | Value *samplerHandle; |
3617 | | static const unsigned kMaxCoordDimensions = 4; |
3618 | | Value *coord[kMaxCoordDimensions]; |
3619 | | unsigned channel; |
3620 | | Value *special; // For CompareValue, Bias, LOD. |
3621 | | // Optional. |
3622 | | static const unsigned kMaxOffsetDimensions = 2; |
3623 | | Value *offset[kMaxOffsetDimensions]; |
3624 | | // For the overload send different offset for each sample. |
3625 | | // Only save 3 sampleOffsets because use offset for normal overload as first |
3626 | | // sample offset. |
3627 | | static const unsigned kSampleOffsetDimensions = 3; |
3628 | | Value *sampleOffsets[kSampleOffsetDimensions][kMaxOffsetDimensions]; |
3629 | | Value *status; |
3630 | | |
3631 | | bool hasSampleOffsets; |
3632 | | |
3633 | | unsigned maxHLOperandRead = 0; |
3634 | 6.79k | Value *ReadHLOperand(CallInst *CI, unsigned opIdx) { |
3635 | 6.79k | if (CI->getNumArgOperands() > opIdx) { |
3636 | 4.58k | maxHLOperandRead = std::max(maxHLOperandRead, opIdx); |
3637 | 4.58k | return CI->getArgOperand(opIdx); |
3638 | 4.58k | } |
3639 | 2.20k | return nullptr; |
3640 | 6.79k | } |
3641 | | void TranslateCoord(CallInst *CI, unsigned coordIdx, |
3642 | 1.73k | unsigned coordDimensions) { |
3643 | 1.73k | Value *coordArg = ReadHLOperand(CI, coordIdx); |
3644 | 1.73k | DXASSERT_NOMSG(coordArg); |
3645 | 1.73k | DXASSERT(coordArg->getType()->getVectorNumElements() == coordDimensions, |
3646 | 1.73k | "otherwise, HL coordinate dimensions mismatch"); |
3647 | 1.73k | IRBuilder<> Builder(CI); |
3648 | 5.80k | for (unsigned i = 0; i < coordDimensions; i++4.06k ) |
3649 | 4.06k | coord[i] = Builder.CreateExtractElement(coordArg, i); |
3650 | 1.73k | Value *undefF = UndefValue::get(Type::getFloatTy(CI->getContext())); |
3651 | 4.62k | for (unsigned i = coordDimensions; i < kMaxCoordDimensions; i++2.88k ) |
3652 | 2.88k | coord[i] = undefF; |
3653 | 1.73k | } |
3654 | 1.73k | void SetStatus(CallInst *CI, unsigned statusIdx) { |
3655 | 1.73k | status = ReadHLOperand(CI, statusIdx); |
3656 | 1.73k | } |
3657 | | void TranslateOffset(CallInst *CI, unsigned offsetIdx, |
3658 | 1.73k | unsigned offsetDimensions) { |
3659 | 1.73k | IntegerType *i32Ty = Type::getInt32Ty(CI->getContext()); |
3660 | 1.73k | if (Value *offsetArg = ReadHLOperand(CI, offsetIdx)) { |
3661 | 804 | DXASSERT(offsetArg->getType()->getVectorNumElements() == offsetDimensions, |
3662 | 804 | "otherwise, HL coordinate dimensions mismatch"); |
3663 | 804 | IRBuilder<> Builder(CI); |
3664 | 2.41k | for (unsigned i = 0; i < offsetDimensions; i++1.60k ) |
3665 | 1.60k | offset[i] = Builder.CreateExtractElement(offsetArg, i); |
3666 | 934 | } else { |
3667 | | // Use zeros for offsets when not specified, not undef. |
3668 | 934 | Value *zero = ConstantInt::get(i32Ty, (uint64_t)0); |
3669 | 2.14k | for (unsigned i = 0; i < offsetDimensions; i++1.21k ) |
3670 | 1.21k | offset[i] = zero; |
3671 | 934 | } |
3672 | | // Use undef for components that should not be used for this resource dim. |
3673 | 1.73k | Value *undefI = UndefValue::get(i32Ty); |
3674 | 2.39k | for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++656 ) |
3675 | 656 | offset[i] = undefI; |
3676 | 1.73k | } |
3677 | | void TranslateSampleOffset(CallInst *CI, unsigned offsetIdx, |
3678 | 848 | unsigned offsetDimensions) { |
3679 | 848 | Value *undefI = UndefValue::get(Type::getInt32Ty(CI->getContext())); |
3680 | 848 | if (CI->getNumArgOperands() >= (offsetIdx + kSampleOffsetDimensions)) { |
3681 | 344 | hasSampleOffsets = true; |
3682 | 344 | IRBuilder<> Builder(CI); |
3683 | 1.37k | for (unsigned ch = 0; ch < kSampleOffsetDimensions; ch++1.03k ) { |
3684 | 1.03k | Value *offsetArg = ReadHLOperand(CI, offsetIdx + ch); |
3685 | 3.09k | for (unsigned i = 0; i < offsetDimensions; i++2.06k ) |
3686 | 2.06k | sampleOffsets[ch][i] = Builder.CreateExtractElement(offsetArg, i); |
3687 | 1.03k | for (unsigned i = offsetDimensions; i < kMaxOffsetDimensions; i++0 ) |
3688 | 0 | sampleOffsets[ch][i] = undefI; |
3689 | 1.03k | } |
3690 | 344 | } |
3691 | 848 | } |
3692 | | // Update the offset args for gather with sample offset at sampleIdx. |
3693 | | void UpdateOffsetInGatherArgs(MutableArrayRef<Value *> gatherArgs, |
3694 | 1.03k | unsigned sampleIdx) { |
3695 | 1.03k | unsigned offsetBase = DXIL::OperandIndex::kTextureGatherOffset0OpIdx; |
3696 | 3.09k | for (unsigned i = 0; i < kMaxOffsetDimensions; i++2.06k ) |
3697 | | // -1 because offset for sample 0 is in GatherHelper::offset. |
3698 | 2.06k | gatherArgs[offsetBase + i] = sampleOffsets[sampleIdx - 1][i]; |
3699 | 1.03k | } |
3700 | | }; |
3701 | | |
3702 | | GatherHelper::GatherHelper(CallInst *CI, OP::OpCode op, |
3703 | | HLObjectOperationLowerHelper *pObjHelper, |
3704 | | GatherHelper::GatherChannel ch) |
3705 | 1.73k | : opcode(op), special(nullptr), hasSampleOffsets(false) { |
3706 | | |
3707 | 1.73k | switch (ch) { |
3708 | 626 | case GatherChannel::GatherAll: |
3709 | 626 | channel = 0; |
3710 | 626 | break; |
3711 | 320 | case GatherChannel::GatherRed: |
3712 | 320 | channel = 0; |
3713 | 320 | break; |
3714 | 256 | case GatherChannel::GatherGreen: |
3715 | 256 | channel = 1; |
3716 | 256 | break; |
3717 | 272 | case GatherChannel::GatherBlue: |
3718 | 272 | channel = 2; |
3719 | 272 | break; |
3720 | 264 | case GatherChannel::GatherAlpha: |
3721 | 264 | channel = 3; |
3722 | 264 | break; |
3723 | 1.73k | } |
3724 | | |
3725 | 1.73k | IRBuilder<> Builder(CI); |
3726 | 1.73k | texHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
3727 | 1.73k | samplerHandle = CI->getArgOperand(HLOperandIndex::kSampleSamplerArgIndex); |
3728 | | |
3729 | 1.73k | DXIL::ResourceKind RK = pObjHelper->GetRK(texHandle); |
3730 | 1.73k | if (RK == DXIL::ResourceKind::Invalid) { |
3731 | 0 | opcode = DXIL::OpCode::NumOpCodes; |
3732 | 0 | return; |
3733 | 0 | } |
3734 | 1.73k | unsigned coordSize = DxilResource::GetNumCoords(RK); |
3735 | 1.73k | unsigned offsetSize = DxilResource::GetNumOffsets(RK); |
3736 | 1.73k | bool cube = RK == DXIL::ResourceKind::TextureCube || |
3737 | 1.73k | RK == DXIL::ResourceKind::TextureCubeArray1.57k ; |
3738 | | |
3739 | 1.73k | const unsigned kCoordArgIdx = HLOperandIndex::kSampleCoordArgIndex; |
3740 | 1.73k | TranslateCoord(CI, kCoordArgIdx, coordSize); |
3741 | | |
3742 | 1.73k | switch (op) { |
3743 | 1.04k | case OP::OpCode::TextureGather: { |
3744 | 1.04k | unsigned statusIdx; |
3745 | 1.04k | if (cube) { |
3746 | 168 | TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize); |
3747 | 168 | statusIdx = HLOperandIndex::kGatherCubeStatusArgIndex; |
3748 | 880 | } else { |
3749 | 880 | TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize); |
3750 | | // Gather all don't have sample offset version overload. |
3751 | 880 | if (ch != GatherChannel::GatherAll) |
3752 | 536 | TranslateSampleOffset(CI, HLOperandIndex::kGatherSampleOffsetArgIndex, |
3753 | 536 | offsetSize); |
3754 | 880 | statusIdx = hasSampleOffsets |
3755 | 880 | ? HLOperandIndex::kGatherStatusWithSampleOffsetArgIndex216 |
3756 | 880 | : HLOperandIndex::kGatherStatusArgIndex664 ; |
3757 | 880 | } |
3758 | 1.04k | SetStatus(CI, statusIdx); |
3759 | 1.04k | } break; |
3760 | 546 | case OP::OpCode::TextureGatherCmp: { |
3761 | 546 | special = ReadHLOperand(CI, HLOperandIndex::kGatherCmpCmpValArgIndex); |
3762 | 546 | unsigned statusIdx; |
3763 | 546 | if (cube) { |
3764 | 160 | TranslateOffset(CI, HLOperandIndex::kInvalidIdx, offsetSize); |
3765 | 160 | statusIdx = HLOperandIndex::kGatherCmpCubeStatusArgIndex; |
3766 | 386 | } else { |
3767 | 386 | TranslateOffset(CI, HLOperandIndex::kGatherCmpOffsetArgIndex, offsetSize); |
3768 | | // Gather all don't have sample offset version overload. |
3769 | 386 | if (ch != GatherChannel::GatherAll) |
3770 | 312 | TranslateSampleOffset( |
3771 | 312 | CI, HLOperandIndex::kGatherCmpSampleOffsetArgIndex, offsetSize); |
3772 | 386 | statusIdx = hasSampleOffsets |
3773 | 386 | ? HLOperandIndex::kGatherCmpStatusWithSampleOffsetArgIndex128 |
3774 | 386 | : HLOperandIndex::kGatherCmpStatusArgIndex258 ; |
3775 | 386 | } |
3776 | 546 | SetStatus(CI, statusIdx); |
3777 | 546 | } break; |
3778 | 144 | case OP::OpCode::TextureGatherRaw: { |
3779 | 144 | unsigned statusIdx; |
3780 | 144 | TranslateOffset(CI, HLOperandIndex::kGatherOffsetArgIndex, offsetSize); |
3781 | | // Gather all don't have sample offset version overload. |
3782 | 144 | DXASSERT(ch == GatherChannel::GatherAll, |
3783 | 144 | "Raw gather must use all channels"); |
3784 | 144 | DXASSERT(!cube, "Raw gather can't be used with cube textures"); |
3785 | 144 | DXASSERT(!hasSampleOffsets, |
3786 | 144 | "Raw gather doesn't support individual offsets"); |
3787 | 144 | statusIdx = HLOperandIndex::kGatherStatusArgIndex; |
3788 | 144 | SetStatus(CI, statusIdx); |
3789 | 144 | } break; |
3790 | 0 | default: |
3791 | 0 | DXASSERT(0, "invalid opcode for Gather"); |
3792 | 0 | break; |
3793 | 1.73k | } |
3794 | 1.73k | DXASSERT(maxHLOperandRead == CI->getNumArgOperands() - 1, |
3795 | 1.73k | "otherwise, unused HL arguments for Sample op"); |
3796 | 1.73k | } |
3797 | | |
3798 | | void GenerateDxilGather(CallInst *CI, Function *F, |
3799 | | MutableArrayRef<Value *> gatherArgs, |
3800 | 1.73k | GatherHelper &helper, hlsl::OP *hlslOp) { |
3801 | 1.73k | IRBuilder<> Builder(CI); |
3802 | | |
3803 | 1.73k | CallInst *call = Builder.CreateCall(F, gatherArgs); |
3804 | | |
3805 | 1.73k | dxilutil::MigrateDebugValue(CI, call); |
3806 | | |
3807 | 1.73k | Value *retVal; |
3808 | 1.73k | if (!helper.hasSampleOffsets) { |
3809 | | // extract value part |
3810 | 1.39k | retVal = ScalarizeResRet(CI->getType(), call, Builder); |
3811 | 1.39k | } else { |
3812 | 344 | retVal = UndefValue::get(CI->getType()); |
3813 | 344 | Value *elt = Builder.CreateExtractValue(call, (uint64_t)0); |
3814 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, (uint64_t)0); |
3815 | | |
3816 | 344 | helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 1); |
3817 | 344 | CallInst *callY = Builder.CreateCall(F, gatherArgs); |
3818 | 344 | elt = Builder.CreateExtractValue(callY, (uint64_t)1); |
3819 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, 1); |
3820 | | |
3821 | 344 | helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 2); |
3822 | 344 | CallInst *callZ = Builder.CreateCall(F, gatherArgs); |
3823 | 344 | elt = Builder.CreateExtractValue(callZ, (uint64_t)2); |
3824 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, 2); |
3825 | | |
3826 | 344 | helper.UpdateOffsetInGatherArgs(gatherArgs, /*sampleIdx*/ 3); |
3827 | 344 | CallInst *callW = Builder.CreateCall(F, gatherArgs); |
3828 | 344 | elt = Builder.CreateExtractValue(callW, (uint64_t)3); |
3829 | 344 | retVal = Builder.CreateInsertElement(retVal, elt, 3); |
3830 | | |
3831 | | // TODO: UpdateStatus for each gather call. |
3832 | 344 | } |
3833 | | |
3834 | | // Replace ret val. |
3835 | 1.73k | CI->replaceAllUsesWith(retVal); |
3836 | | |
3837 | | // Get status |
3838 | 1.73k | if (helper.status) { |
3839 | 464 | UpdateStatus(call, helper.status, Builder, hlslOp); |
3840 | 464 | } |
3841 | 1.73k | } |
3842 | | |
3843 | | Value *TranslateGather(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3844 | | HLOperationLowerHelper &helper, |
3845 | | HLObjectOperationLowerHelper *pObjHelper, |
3846 | 1.73k | bool &Translated) { |
3847 | 1.73k | hlsl::OP *hlslOP = &helper.hlslOP; |
3848 | 1.73k | GatherHelper::GatherChannel ch = GatherHelper::GatherChannel::GatherAll; |
3849 | 1.73k | switch (IOP) { |
3850 | 376 | case IntrinsicOp::MOP_Gather: |
3851 | 482 | case IntrinsicOp::MOP_GatherCmp: |
3852 | 626 | case IntrinsicOp::MOP_GatherRaw: |
3853 | 626 | ch = GatherHelper::GatherChannel::GatherAll; |
3854 | 626 | break; |
3855 | 192 | case IntrinsicOp::MOP_GatherRed: |
3856 | 320 | case IntrinsicOp::MOP_GatherCmpRed: |
3857 | 320 | ch = GatherHelper::GatherChannel::GatherRed; |
3858 | 320 | break; |
3859 | 152 | case IntrinsicOp::MOP_GatherGreen: |
3860 | 256 | case IntrinsicOp::MOP_GatherCmpGreen: |
3861 | 256 | ch = GatherHelper::GatherChannel::GatherGreen; |
3862 | 256 | break; |
3863 | 168 | case IntrinsicOp::MOP_GatherBlue: |
3864 | 272 | case IntrinsicOp::MOP_GatherCmpBlue: |
3865 | 272 | ch = GatherHelper::GatherChannel::GatherBlue; |
3866 | 272 | break; |
3867 | 160 | case IntrinsicOp::MOP_GatherAlpha: |
3868 | 264 | case IntrinsicOp::MOP_GatherCmpAlpha: |
3869 | 264 | ch = GatherHelper::GatherChannel::GatherAlpha; |
3870 | 264 | break; |
3871 | 0 | default: |
3872 | 0 | DXASSERT(0, "invalid gather intrinsic"); |
3873 | 0 | break; |
3874 | 1.73k | } |
3875 | | |
3876 | 1.73k | GatherHelper gatherHelper(CI, opcode, pObjHelper, ch); |
3877 | | |
3878 | 1.73k | if (gatherHelper.opcode == DXIL::OpCode::NumOpCodes) { |
3879 | 0 | Translated = false; |
3880 | 0 | return nullptr; |
3881 | 0 | } |
3882 | 1.73k | Type *Ty = CI->getType(); |
3883 | | |
3884 | 1.73k | Function *F = hlslOP->GetOpFunc(gatherHelper.opcode, Ty->getScalarType()); |
3885 | | |
3886 | 1.73k | Constant *opArg = hlslOP->GetU32Const((unsigned)gatherHelper.opcode); |
3887 | 1.73k | Value *channelArg = hlslOP->GetU32Const(gatherHelper.channel); |
3888 | | |
3889 | 1.73k | switch (opcode) { |
3890 | 1.04k | case OP::OpCode::TextureGather: { |
3891 | 1.04k | Value *gatherArgs[] = {opArg, gatherHelper.texHandle, |
3892 | 1.04k | gatherHelper.samplerHandle, |
3893 | | // Coord. |
3894 | 1.04k | gatherHelper.coord[0], gatherHelper.coord[1], |
3895 | 1.04k | gatherHelper.coord[2], gatherHelper.coord[3], |
3896 | | // Offset. |
3897 | 1.04k | gatherHelper.offset[0], gatherHelper.offset[1], |
3898 | | // Channel. |
3899 | 1.04k | channelArg}; |
3900 | 1.04k | GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP); |
3901 | 1.04k | } break; |
3902 | 546 | case OP::OpCode::TextureGatherCmp: { |
3903 | 546 | Value *gatherArgs[] = {opArg, gatherHelper.texHandle, |
3904 | 546 | gatherHelper.samplerHandle, |
3905 | | // Coord. |
3906 | 546 | gatherHelper.coord[0], gatherHelper.coord[1], |
3907 | 546 | gatherHelper.coord[2], gatherHelper.coord[3], |
3908 | | // Offset. |
3909 | 546 | gatherHelper.offset[0], gatherHelper.offset[1], |
3910 | | // Channel. |
3911 | 546 | channelArg, |
3912 | | // CmpVal. |
3913 | 546 | gatherHelper.special}; |
3914 | 546 | GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP); |
3915 | 546 | } break; |
3916 | 144 | case OP::OpCode::TextureGatherRaw: { |
3917 | 144 | Value *gatherArgs[] = {opArg, gatherHelper.texHandle, |
3918 | 144 | gatherHelper.samplerHandle, |
3919 | | // Coord. |
3920 | 144 | gatherHelper.coord[0], gatherHelper.coord[1], |
3921 | 144 | gatherHelper.coord[2], gatherHelper.coord[3], |
3922 | | // Offset. |
3923 | 144 | gatherHelper.offset[0], gatherHelper.offset[1]}; |
3924 | 144 | GenerateDxilGather(CI, F, gatherArgs, gatherHelper, hlslOP); |
3925 | 144 | break; |
3926 | 0 | } |
3927 | 0 | default: |
3928 | 0 | DXASSERT(0, "invalid opcode for Gather"); |
3929 | 0 | break; |
3930 | 1.73k | } |
3931 | | // CI is replaced in GenerateDxilGather. |
3932 | 1.73k | return nullptr; |
3933 | 1.73k | } |
3934 | | |
3935 | | static Value * |
3936 | | TranslateWriteSamplerFeedback(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
3937 | | HLOperationLowerHelper &helper, |
3938 | | HLObjectOperationLowerHelper *pObjHelper, |
3939 | 300 | bool &Translated) { |
3940 | 300 | hlsl::OP *hlslOP = &helper.hlslOP; |
3941 | 300 | SampleHelper sampleHelper(CI, opcode, pObjHelper); |
3942 | | |
3943 | 300 | if (sampleHelper.opcode == DXIL::OpCode::NumOpCodes) { |
3944 | 0 | Translated = false; |
3945 | 0 | return nullptr; |
3946 | 0 | } |
3947 | 300 | Type *Ty = CI->getType(); |
3948 | | |
3949 | 300 | Function *F = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); |
3950 | | |
3951 | 300 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
3952 | | |
3953 | 300 | IRBuilder<> Builder(CI); |
3954 | | |
3955 | 300 | switch (opcode) { |
3956 | 180 | case OP::OpCode::WriteSamplerFeedback: { |
3957 | 180 | Value *samplerFeedbackArgs[] = { |
3958 | 180 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
3959 | 180 | sampleHelper.samplerHandle, |
3960 | | // Coord. |
3961 | 180 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3962 | 180 | sampleHelper.coord[3], |
3963 | | // Clamp. |
3964 | 180 | sampleHelper.clamp}; |
3965 | 180 | return Builder.CreateCall(F, samplerFeedbackArgs); |
3966 | 0 | } break; |
3967 | 64 | case OP::OpCode::WriteSamplerFeedbackBias: { |
3968 | 64 | Value *samplerFeedbackArgs[] = { |
3969 | 64 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
3970 | 64 | sampleHelper.samplerHandle, |
3971 | | // Coord. |
3972 | 64 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3973 | 64 | sampleHelper.coord[3], |
3974 | | // Bias. |
3975 | 64 | sampleHelper.bias, |
3976 | | // Clamp. |
3977 | 64 | sampleHelper.clamp}; |
3978 | 64 | return Builder.CreateCall(F, samplerFeedbackArgs); |
3979 | 0 | } break; |
3980 | 32 | case OP::OpCode::WriteSamplerFeedbackGrad: { |
3981 | 32 | Value *samplerFeedbackArgs[] = { |
3982 | 32 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
3983 | 32 | sampleHelper.samplerHandle, |
3984 | | // Coord. |
3985 | 32 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
3986 | 32 | sampleHelper.coord[3], |
3987 | | // Ddx. |
3988 | 32 | sampleHelper.ddx[0], sampleHelper.ddx[1], sampleHelper.ddx[2], |
3989 | | // Ddy. |
3990 | 32 | sampleHelper.ddy[0], sampleHelper.ddy[1], sampleHelper.ddy[2], |
3991 | | // Clamp. |
3992 | 32 | sampleHelper.clamp}; |
3993 | 32 | return Builder.CreateCall(F, samplerFeedbackArgs); |
3994 | 0 | } break; |
3995 | 24 | case OP::OpCode::WriteSamplerFeedbackLevel: { |
3996 | 24 | Value *samplerFeedbackArgs[] = { |
3997 | 24 | opArg, sampleHelper.texHandle, sampleHelper.sampledTexHandle, |
3998 | 24 | sampleHelper.samplerHandle, |
3999 | | // Coord. |
4000 | 24 | sampleHelper.coord[0], sampleHelper.coord[1], sampleHelper.coord[2], |
4001 | 24 | sampleHelper.coord[3], |
4002 | | // LOD. |
4003 | 24 | sampleHelper.lod}; |
4004 | 24 | return Builder.CreateCall(F, samplerFeedbackArgs); |
4005 | 0 | } break; |
4006 | 0 | default: |
4007 | 0 | DXASSERT(false, "otherwise, unknown SamplerFeedback Op"); |
4008 | 0 | break; |
4009 | 300 | } |
4010 | 0 | return nullptr; |
4011 | 300 | } |
4012 | | |
4013 | | // Load/Store intrinsics. |
4014 | 21.7k | OP::OpCode LoadOpFromResKind(DxilResource::Kind RK) { |
4015 | 21.7k | switch (RK) { |
4016 | 3.50k | case DxilResource::Kind::RawBuffer: |
4017 | 14.2k | case DxilResource::Kind::StructuredBuffer: |
4018 | 14.2k | return OP::OpCode::RawBufferLoad; |
4019 | 2.52k | case DxilResource::Kind::TypedBuffer: |
4020 | 2.52k | return OP::OpCode::BufferLoad; |
4021 | 0 | case DxilResource::Kind::Invalid: |
4022 | 0 | DXASSERT(0, "invalid resource kind"); |
4023 | 0 | break; |
4024 | 5.00k | default: |
4025 | 5.00k | return OP::OpCode::TextureLoad; |
4026 | 21.7k | } |
4027 | 0 | return OP::OpCode::TextureLoad; |
4028 | 21.7k | } |
4029 | | |
4030 | | struct ResLoadHelper { |
4031 | | // Default constructor uses CI load intrinsic call |
4032 | | // to get the retval and various location indicators. |
4033 | | ResLoadHelper(CallInst *CI, DxilResource::Kind RK, DxilResourceBase::Class RC, |
4034 | | Value *h, IntrinsicOp IOP, LoadInst *TyBufSubLoad = nullptr); |
4035 | | // Alternative constructor explicitly sets the index. |
4036 | | // Used for some subscript operators that feed the generic HL call inst |
4037 | | // into a load op and by the matrixload call instruction. |
4038 | | ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, |
4039 | | Value *Offset, Value *status = nullptr, Value *mip = nullptr) |
4040 | 10.1k | : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), |
4041 | 10.1k | addr(idx), offset(Offset), status(status), mipLevel(mip) { |
4042 | 10.1k | opcode = LoadOpFromResKind(RK); |
4043 | 10.1k | Type *Ty = Inst->getType(); |
4044 | 10.1k | if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy()9.98k && |
4045 | 10.1k | Ty->getVectorNumElements() > 15.96k && |
4046 | 10.1k | Inst->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()4.26k ) |
4047 | 1.39k | opcode = OP::OpCode::RawBufferVectorLoad; |
4048 | 10.1k | } |
4049 | | OP::OpCode opcode; |
4050 | | IntrinsicOp intrinsicOpCode; |
4051 | | unsigned dxilMajor; |
4052 | | unsigned dxilMinor; |
4053 | | Value *handle; |
4054 | | Value *retVal; |
4055 | | Value *addr; |
4056 | | Value *offset; |
4057 | | Value *status; |
4058 | | Value *mipLevel; |
4059 | | }; |
4060 | | |
4061 | | // Uses CI arguments to determine the index, offset, and mipLevel also depending |
4062 | | // on the RK/RC resource kind and class, which determine the opcode. |
4063 | | // Handle and IOP are set explicitly. |
4064 | | // For typed buffer loads, the call instruction feeds into a load |
4065 | | // represented by TyBufSubLoad which determines the instruction to replace. |
4066 | | // Otherwise, CI is replaced. |
4067 | | ResLoadHelper::ResLoadHelper(CallInst *CI, DxilResource::Kind RK, |
4068 | | DxilResourceBase::Class RC, Value *hdl, |
4069 | | IntrinsicOp IOP, LoadInst *TyBufSubLoad) |
4070 | 11.6k | : intrinsicOpCode(IOP), handle(hdl), offset(nullptr), status(nullptr) { |
4071 | 11.6k | opcode = LoadOpFromResKind(RK); |
4072 | 11.6k | bool bForSubscript = false; |
4073 | 11.6k | if (TyBufSubLoad) { |
4074 | 2.75k | bForSubscript = true; |
4075 | 2.75k | retVal = TyBufSubLoad; |
4076 | 2.75k | } else |
4077 | 8.86k | retVal = CI; |
4078 | 11.6k | const unsigned kAddrIdx = HLOperandIndex::kBufLoadAddrOpIdx; |
4079 | 11.6k | addr = CI->getArgOperand(kAddrIdx); |
4080 | 11.6k | unsigned argc = CI->getNumArgOperands(); |
4081 | 11.6k | Type *i32Ty = Type::getInt32Ty(CI->getContext()); |
4082 | 11.6k | unsigned StatusIdx = HLOperandIndex::kBufLoadStatusOpIdx; |
4083 | 11.6k | unsigned OffsetIdx = HLOperandIndex::kInvalidIdx; |
4084 | | |
4085 | 11.6k | if (opcode == OP::OpCode::TextureLoad) { |
4086 | 4.86k | bool IsMS = (RK == DxilResource::Kind::Texture2DMS || |
4087 | 4.86k | RK == DxilResource::Kind::Texture2DMSArray4.61k ); |
4088 | | // Set mip and status index. |
4089 | 4.86k | offset = UndefValue::get(i32Ty); |
4090 | 4.86k | if (IsMS) { |
4091 | | // Retrieve appropriate MS parameters. |
4092 | 408 | StatusIdx = HLOperandIndex::kTex2DMSLoadStatusOpIdx; |
4093 | | // MS textures keep the sample param (mipLevel) regardless of writability. |
4094 | 408 | if (bForSubscript) |
4095 | 50 | mipLevel = ConstantInt::get(i32Ty, 0); |
4096 | 358 | else |
4097 | 358 | mipLevel = |
4098 | 358 | CI->getArgOperand(HLOperandIndex::kTex2DMSLoadSampleIdxOpIdx); |
4099 | 4.45k | } else if (RC == DxilResourceBase::Class::UAV) { |
4100 | | // DXIL requires that non-MS UAV accesses set miplevel to undef. |
4101 | 2.07k | mipLevel = UndefValue::get(i32Ty); |
4102 | 2.07k | StatusIdx = HLOperandIndex::kRWTexLoadStatusOpIdx; |
4103 | 2.38k | } else { |
4104 | | // Non-MS SRV case. |
4105 | 2.38k | StatusIdx = HLOperandIndex::kTexLoadStatusOpIdx; |
4106 | 2.38k | if (bForSubscript) |
4107 | | // Having no miplevel param, single subscripted SRVs default to 0. |
4108 | 1.37k | mipLevel = ConstantInt::get(i32Ty, 0); |
4109 | 1.00k | else |
4110 | | // Mip is stored at the last channel of the coordinate vector. |
4111 | 1.00k | mipLevel = IRBuilder<>(CI).CreateExtractElement( |
4112 | 1.00k | addr, DxilResource::GetNumCoords(RK)); |
4113 | 2.38k | } |
4114 | 4.86k | if (RC == DxilResourceBase::Class::SRV) |
4115 | 2.68k | OffsetIdx = IsMS ? HLOperandIndex::kTex2DMSLoadOffsetOpIdx304 |
4116 | 2.68k | : HLOperandIndex::kTexLoadOffsetOpIdx2.38k ; |
4117 | 6.75k | } else if (opcode == OP::OpCode::RawBufferLoad) { |
4118 | | // If native vectors are available and this load had a vector |
4119 | | // with more than one elements, convert the RawBufferLod to the |
4120 | | // native vector variant RawBufferVectorLoad. |
4121 | 4.22k | Type *Ty = CI->getType(); |
4122 | 4.22k | if (Ty->isVectorTy() && Ty->getVectorNumElements() > 12.24k && |
4123 | 4.22k | CI->getModule()->GetHLModule().GetShaderModel()->IsSM69Plus()2.11k ) |
4124 | 1.06k | opcode = OP::OpCode::RawBufferVectorLoad; |
4125 | 4.22k | } |
4126 | | |
4127 | | // Set offset. |
4128 | 11.6k | if (DXIL::IsStructuredBuffer(RK)) |
4129 | | // Structured buffers receive no exterior offset in this constructor, |
4130 | | // but may need to increment it later. |
4131 | 1.28k | offset = ConstantInt::get(i32Ty, 0U); |
4132 | 10.3k | else if (argc > OffsetIdx) |
4133 | | // Textures may set the offset from an explicit argument. |
4134 | 102 | offset = CI->getArgOperand(OffsetIdx); |
4135 | 10.2k | else |
4136 | | // All other cases use undef. |
4137 | 10.2k | offset = UndefValue::get(i32Ty); |
4138 | | |
4139 | | // Retrieve status value if provided. |
4140 | 11.6k | if (argc > StatusIdx) |
4141 | 1.12k | status = CI->getArgOperand(StatusIdx); |
4142 | 11.6k | } |
4143 | | |
4144 | | void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, |
4145 | | hlsl::OP *OP, HLResource::Kind RK, |
4146 | | const DataLayout &DL); |
4147 | | |
4148 | | static Constant *GetRawBufferMaskForETy(Type *Ty, unsigned NumComponents, |
4149 | 11.6k | hlsl::OP *OP) { |
4150 | 11.6k | unsigned mask = 0; |
4151 | | |
4152 | 11.6k | switch (NumComponents) { |
4153 | 0 | case 0: |
4154 | 0 | break; |
4155 | 7.01k | case 1: |
4156 | 7.01k | mask = DXIL::kCompMask_X; |
4157 | 7.01k | break; |
4158 | 1.14k | case 2: |
4159 | 1.14k | mask = DXIL::kCompMask_X | DXIL::kCompMask_Y; |
4160 | 1.14k | break; |
4161 | 602 | case 3: |
4162 | 602 | mask = DXIL::kCompMask_X | DXIL::kCompMask_Y | DXIL::kCompMask_Z; |
4163 | 602 | break; |
4164 | 2.88k | case 4: |
4165 | 2.88k | mask = DXIL::kCompMask_All; |
4166 | 2.88k | break; |
4167 | 0 | default: |
4168 | 0 | DXASSERT(false, "Cannot load more than 2 components for 64bit types."); |
4169 | 11.6k | } |
4170 | 11.6k | return OP->GetI8Const(mask); |
4171 | 11.6k | } |
4172 | | |
4173 | | Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, |
4174 | | Value *status, Type *EltTy, |
4175 | | MutableArrayRef<Value *> resultElts, hlsl::OP *OP, |
4176 | | IRBuilder<> &Builder, unsigned NumComponents, |
4177 | | Constant *alignment); |
4178 | | |
4179 | | // Sets up arguments for buffer load call. |
4180 | | static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper, |
4181 | | HLResource::Kind RK, |
4182 | | IRBuilder<> Builder, |
4183 | 18.0k | unsigned LdSize) { |
4184 | 18.0k | OP::OpCode opcode = helper.opcode; |
4185 | 18.0k | llvm::Constant *opArg = Builder.getInt32((uint32_t)opcode); |
4186 | | |
4187 | 18.0k | unsigned alignment = RK == DxilResource::Kind::RawBuffer ? 4U3.22k : 8U14.8k ; |
4188 | 18.0k | alignment = std::min(alignment, LdSize); |
4189 | 18.0k | Constant *alignmentVal = Builder.getInt32(alignment); |
4190 | | |
4191 | | // Assemble args specific to the type bab/struct/typed: |
4192 | | // - Typed needs to handle the possibility of vector coords |
4193 | | // - Raws need to calculate alignment and mask values. |
4194 | 18.0k | SmallVector<Value *, 10> Args; |
4195 | 18.0k | Args.emplace_back(opArg); // opcode @0. |
4196 | 18.0k | Args.emplace_back(helper.handle); // Resource handle @1 |
4197 | | |
4198 | | // Set offsets appropriate for the load operation. |
4199 | 18.0k | bool isVectorAddr = helper.addr->getType()->isVectorTy(); |
4200 | 18.0k | if (opcode == OP::OpCode::TextureLoad) { |
4201 | 3.68k | llvm::Value *undefI = llvm::UndefValue::get(Builder.getInt32Ty()); |
4202 | | |
4203 | | // Set mip level or sample for MS texutures @2. |
4204 | 3.68k | Args.emplace_back(helper.mipLevel); |
4205 | | // Set texture coords according to resource kind @3-5 |
4206 | | // Coords unused by the resource kind are undefs. |
4207 | 3.68k | unsigned coordSize = DxilResource::GetNumCoords(RK); |
4208 | 14.7k | for (unsigned i = 0; i < 3; i++11.0k ) |
4209 | 11.0k | if (i < coordSize) |
4210 | 7.22k | Args.emplace_back(isVectorAddr |
4211 | 7.22k | ? Builder.CreateExtractElement(helper.addr, i)6.80k |
4212 | 7.22k | : helper.addr420 ); |
4213 | 3.81k | else |
4214 | 3.81k | Args.emplace_back(undefI); |
4215 | | |
4216 | | // Set texture offsets according to resource kind @7-9 |
4217 | | // Coords unused by the resource kind are undefs. |
4218 | 3.68k | unsigned offsetSize = DxilResource::GetNumOffsets(RK); |
4219 | 3.68k | if (!helper.offset || isa<llvm::UndefValue>(helper.offset)) |
4220 | 3.57k | offsetSize = 0; |
4221 | 14.7k | for (unsigned i = 0; i < 3; i++11.0k ) |
4222 | 11.0k | if (i < offsetSize) |
4223 | 204 | Args.emplace_back(Builder.CreateExtractElement(helper.offset, i)); |
4224 | 10.8k | else |
4225 | 10.8k | Args.emplace_back(undefI); |
4226 | 14.3k | } else { |
4227 | | // If not TextureLoad, it could be a typed or raw buffer load. |
4228 | | // They have mostly similar arguments. |
4229 | 14.3k | DXASSERT(opcode == OP::OpCode::RawBufferLoad || |
4230 | 14.3k | opcode == OP::OpCode::RawBufferVectorLoad || |
4231 | 14.3k | opcode == OP::OpCode::BufferLoad, |
4232 | 14.3k | "Wrong opcode in get load args"); |
4233 | 14.3k | Args.emplace_back( |
4234 | 14.3k | isVectorAddr ? Builder.CreateExtractElement(helper.addr, (uint64_t)0)230 |
4235 | 14.3k | : helper.addr14.1k ); |
4236 | 14.3k | Args.emplace_back(helper.offset); |
4237 | 14.3k | if (opcode == OP::OpCode::RawBufferLoad) { |
4238 | | // Unlike typed buffer load, raw buffer load has mask and alignment. |
4239 | 10.7k | Args.emplace_back(nullptr); // Mask will be added later %4. |
4240 | 10.7k | Args.emplace_back(alignmentVal); // alignment @5. |
4241 | 10.7k | } else if (3.66k opcode == OP::OpCode::RawBufferVectorLoad3.66k ) { |
4242 | | // RawBufferVectorLoad takes just alignment, no mask. |
4243 | 2.46k | Args.emplace_back(alignmentVal); // alignment @4 |
4244 | 2.46k | } |
4245 | 14.3k | } |
4246 | 18.0k | return Args; |
4247 | 18.0k | } |
4248 | | |
4249 | | // Emits as many calls as needed to load the full vector |
4250 | | // Performs any needed extractions and conversions of the results. |
4251 | | Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, |
4252 | | IRBuilder<> &Builder, hlsl::OP *OP, |
4253 | 18.0k | const DataLayout &DL) { |
4254 | 18.0k | OP::OpCode opcode = helper.opcode; |
4255 | 18.0k | Type *Ty = helper.retVal->getType(); |
4256 | | |
4257 | 18.0k | unsigned NumComponents = 1; |
4258 | 18.0k | if (Ty->isVectorTy()) |
4259 | 11.6k | NumComponents = Ty->getVectorNumElements(); |
4260 | | |
4261 | 18.0k | const bool isTyped = DXIL::IsTyped(RK); |
4262 | 18.0k | Type *EltTy = Ty->getScalarType(); |
4263 | 18.0k | const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy()16.8k ); |
4264 | 18.0k | const bool isBool = EltTy->isIntegerTy(1); |
4265 | | // Values will be loaded in memory representations. |
4266 | 18.0k | if (isBool || (17.7k is6417.7k && isTyped2.58k )) |
4267 | 468 | EltTy = Builder.getInt32Ty(); |
4268 | | |
4269 | | // Calculate load size with the scalar memory element type. |
4270 | 18.0k | unsigned LdSize = DL.getTypeAllocSize(EltTy); |
4271 | | |
4272 | | // Adjust number of components as needed. |
4273 | 18.0k | if (is64 && isTyped2.58k ) { |
4274 | | // 64-bit types are stored as int32 pairs in typed buffers. |
4275 | 146 | DXASSERT(NumComponents <= 2, "Typed buffers only allow 4 dwords."); |
4276 | 146 | NumComponents *= 2; |
4277 | 17.9k | } else if (opcode == OP::OpCode::RawBufferVectorLoad) { |
4278 | | // Native vector loads only have a single vector element in ResRet. |
4279 | 2.46k | EltTy = VectorType::get(EltTy, NumComponents); |
4280 | 2.46k | NumComponents = 1; |
4281 | 2.46k | } |
4282 | | |
4283 | 18.0k | SmallVector<Value *, 10> Args = GetBufLoadArgs(helper, RK, Builder, LdSize); |
4284 | | |
4285 | | // Keep track of the first load for debug info migration. |
4286 | 18.0k | Value *FirstLd = nullptr; |
4287 | | |
4288 | 18.0k | unsigned OffsetIdx = 0; |
4289 | 18.0k | if (RK == DxilResource::Kind::RawBuffer) |
4290 | | // Raw buffers can't use offset param. Add to coord index. |
4291 | 3.22k | OffsetIdx = DXIL::OperandIndex::kRawBufferLoadIndexOpIdx; |
4292 | 14.8k | else if (RK == DxilResource::Kind::StructuredBuffer) |
4293 | 9.94k | OffsetIdx = DXIL::OperandIndex::kRawBufferLoadElementOffsetOpIdx; |
4294 | | |
4295 | | // Create call(s) to function object and collect results in Elts. |
4296 | | // Typed buffer loads are limited to one load of up to 4 32-bit values. |
4297 | | // Raw buffer loads might need multiple loads in chunks of 4. |
4298 | 18.0k | SmallVector<Value *, 4> Elts(NumComponents); |
4299 | 37.0k | for (unsigned i = 0; i < NumComponents;) { |
4300 | | // Load 4 elements or however many less than 4 are left to load. |
4301 | 18.9k | unsigned chunkSize = std::min(NumComponents - i, 4U); |
4302 | | |
4303 | | // Assign mask for raw buffer loads. |
4304 | 18.9k | if (opcode == OP::OpCode::RawBufferLoad) { |
4305 | 11.6k | Args[DXIL::OperandIndex::kRawBufferLoadMaskOpIdx] = |
4306 | 11.6k | GetRawBufferMaskForETy(EltTy, chunkSize, OP); |
4307 | | // If we've loaded a chunk already, update offset to next chunk. |
4308 | 11.6k | if (FirstLd != nullptr) |
4309 | 916 | Args[OffsetIdx] = |
4310 | 916 | Builder.CreateAdd(Args[OffsetIdx], OP->GetU32Const(4 * LdSize)); |
4311 | 11.6k | } |
4312 | | |
4313 | 18.9k | Function *F = OP->GetOpFunc(opcode, EltTy); |
4314 | 18.9k | Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); |
4315 | 18.9k | unsigned StatusIndex; |
4316 | | |
4317 | | // Extract elements from returned ResRet. |
4318 | | // Native vector loads just have one vector element in the ResRet. |
4319 | | // Others have up to four scalars that need to be individually extracted. |
4320 | 18.9k | if (opcode == OP::OpCode::RawBufferVectorLoad) { |
4321 | 2.46k | Elts[i++] = Builder.CreateExtractValue(Ld, 0); |
4322 | 2.46k | StatusIndex = DXIL::kVecResRetStatusIndex; |
4323 | 16.5k | } else { |
4324 | 53.0k | for (unsigned j = 0; j < chunkSize; j++, i++36.5k ) |
4325 | 36.5k | Elts[i] = Builder.CreateExtractValue(Ld, j); |
4326 | 16.5k | StatusIndex = DXIL::kResRetStatusIndex; |
4327 | 16.5k | } |
4328 | | |
4329 | | // Update status. |
4330 | 18.9k | UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex); |
4331 | | |
4332 | 18.9k | if (!FirstLd) |
4333 | 18.0k | FirstLd = Ld; |
4334 | 18.9k | } |
4335 | 18.0k | DXASSERT(FirstLd, "No loads created by TranslateBufLoad"); |
4336 | | |
4337 | | // Convert loaded 32-bit integers to intended 64-bit type representation. |
4338 | 18.0k | if (isTyped) { |
4339 | 4.88k | Type *RegEltTy = Ty->getScalarType(); |
4340 | 4.88k | if (RegEltTy->isDoubleTy()) { |
4341 | 68 | Function *makeDouble = OP->GetOpFunc(DXIL::OpCode::MakeDouble, RegEltTy); |
4342 | 68 | Value *makeDoubleOpArg = |
4343 | 68 | Builder.getInt32((unsigned)DXIL::OpCode::MakeDouble); |
4344 | 68 | NumComponents /= 2; // Convert back to number of doubles. |
4345 | 160 | for (unsigned i = 0; i < NumComponents; i++92 ) { |
4346 | 92 | Value *lo = Elts[2 * i]; |
4347 | 92 | Value *hi = Elts[2 * i + 1]; |
4348 | 92 | Elts[i] = Builder.CreateCall(makeDouble, {makeDoubleOpArg, lo, hi}); |
4349 | 92 | } |
4350 | 68 | EltTy = RegEltTy; |
4351 | 4.81k | } else if (RegEltTy->isIntegerTy(64)) { |
4352 | 78 | NumComponents /= 2; // Convert back to number of int64s. |
4353 | 192 | for (unsigned i = 0; i < NumComponents; i++114 ) { |
4354 | 114 | Value *lo = Elts[2 * i]; |
4355 | 114 | Value *hi = Elts[2 * i + 1]; |
4356 | 114 | lo = Builder.CreateZExt(lo, RegEltTy); |
4357 | 114 | hi = Builder.CreateZExt(hi, RegEltTy); |
4358 | 114 | hi = Builder.CreateShl(hi, 32); |
4359 | 114 | Elts[i] = Builder.CreateOr(lo, hi); |
4360 | 114 | } |
4361 | 78 | EltTy = RegEltTy; |
4362 | 78 | } |
4363 | 4.88k | } |
4364 | | |
4365 | | // Package elements into a vector as needed. |
4366 | 18.0k | Value *retValNew = nullptr; |
4367 | | // Scalar or native vector loads need not construct vectors from elements. |
4368 | 18.0k | if (!Ty->isVectorTy() || opcode == OP::OpCode::RawBufferVectorLoad11.6k ) { |
4369 | 8.84k | retValNew = Elts[0]; |
4370 | 9.20k | } else { |
4371 | 9.20k | retValNew = UndefValue::get(VectorType::get(EltTy, NumComponents)); |
4372 | 39.1k | for (unsigned i = 0; i < NumComponents; i++29.9k ) |
4373 | 29.9k | retValNew = Builder.CreateInsertElement(retValNew, Elts[i], i); |
4374 | 9.20k | } |
4375 | | |
4376 | | // Convert loaded int32 bool results to i1 register representation. |
4377 | 18.0k | if (isBool) |
4378 | 322 | retValNew = Builder.CreateICmpNE( |
4379 | 322 | retValNew, Constant::getNullValue(retValNew->getType())); |
4380 | | |
4381 | 18.0k | helper.retVal->replaceAllUsesWith(retValNew); |
4382 | 18.0k | helper.retVal = retValNew; |
4383 | | |
4384 | 18.0k | return FirstLd; |
4385 | 18.0k | } |
4386 | | |
4387 | | Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
4388 | | HLOperationLowerHelper &helper, |
4389 | | HLObjectOperationLowerHelper *pObjHelper, |
4390 | 6.21k | bool &Translated) { |
4391 | 6.21k | hlsl::OP *hlslOP = &helper.hlslOP; |
4392 | 6.21k | DataLayout &DL = helper.dataLayout; |
4393 | 6.21k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
4394 | | |
4395 | 6.21k | IRBuilder<> Builder(CI); |
4396 | | |
4397 | 6.21k | DXIL::ResourceClass RC = pObjHelper->GetRC(handle); |
4398 | 6.21k | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
4399 | | |
4400 | 6.21k | ResLoadHelper ldHelper(CI, RK, RC, handle, IOP); |
4401 | 6.21k | Type *Ty = CI->getType(); |
4402 | 6.21k | Value *Ld = nullptr; |
4403 | 6.21k | if (Ty->isPointerTy()) { |
4404 | 1.05k | DXASSERT(!DxilResource::IsAnyTexture(RK), |
4405 | 1.05k | "Textures should not be treated as structured buffers."); |
4406 | 1.05k | TranslateStructBufSubscript(cast<CallInst>(ldHelper.retVal), handle, |
4407 | 1.05k | ldHelper.status, hlslOP, RK, DL); |
4408 | 5.16k | } else { |
4409 | 5.16k | Ld = TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); |
4410 | 5.16k | dxilutil::MigrateDebugValue(CI, Ld); |
4411 | 5.16k | } |
4412 | | // CI is replaced by above translation calls.. |
4413 | 6.21k | return nullptr; |
4414 | 6.21k | } |
4415 | | |
4416 | | // Split { v0, v1 } to { v0.lo, v0.hi, v1.lo, v1.hi } |
4417 | | void Split64bitValForStore(Type *EltTy, ArrayRef<Value *> vals, unsigned size, |
4418 | | MutableArrayRef<Value *> vals32, hlsl::OP *hlslOP, |
4419 | 218 | IRBuilder<> &Builder) { |
4420 | 218 | Type *i32Ty = Builder.getInt32Ty(); |
4421 | 218 | Type *doubleTy = Builder.getDoubleTy(); |
4422 | 218 | Value *undefI32 = UndefValue::get(i32Ty); |
4423 | | |
4424 | 218 | if (EltTy == doubleTy) { |
4425 | 40 | Function *dToU = hlslOP->GetOpFunc(DXIL::OpCode::SplitDouble, doubleTy); |
4426 | 40 | Value *dToUOpArg = Builder.getInt32((unsigned)DXIL::OpCode::SplitDouble); |
4427 | 92 | for (unsigned i = 0; i < size; i++52 ) { |
4428 | 52 | if (isa<UndefValue>(vals[i])) { |
4429 | 0 | vals32[2 * i] = undefI32; |
4430 | 0 | vals32[2 * i + 1] = undefI32; |
4431 | 52 | } else { |
4432 | 52 | Value *retVal = Builder.CreateCall(dToU, {dToUOpArg, vals[i]}); |
4433 | 52 | Value *lo = Builder.CreateExtractValue(retVal, 0); |
4434 | 52 | Value *hi = Builder.CreateExtractValue(retVal, 1); |
4435 | 52 | vals32[2 * i] = lo; |
4436 | 52 | vals32[2 * i + 1] = hi; |
4437 | 52 | } |
4438 | 52 | } |
4439 | 178 | } else { |
4440 | 372 | for (unsigned i = 0; i < size; i++194 ) { |
4441 | 194 | if (isa<UndefValue>(vals[i])) { |
4442 | 0 | vals32[2 * i] = undefI32; |
4443 | 0 | vals32[2 * i + 1] = undefI32; |
4444 | 194 | } else { |
4445 | 194 | Value *lo = Builder.CreateTrunc(vals[i], i32Ty); |
4446 | 194 | Value *hi = Builder.CreateLShr(vals[i], 32); |
4447 | 194 | hi = Builder.CreateTrunc(hi, i32Ty); |
4448 | 194 | vals32[2 * i] = lo; |
4449 | 194 | vals32[2 * i + 1] = hi; |
4450 | 194 | } |
4451 | 194 | } |
4452 | 178 | } |
4453 | 218 | } |
4454 | | |
4455 | | void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, |
4456 | | Value *Idx, Value *offset, IRBuilder<> &Builder, |
4457 | 16.6k | hlsl::OP *OP, Value *sampIdx = nullptr) { |
4458 | 16.6k | Type *Ty = val->getType(); |
4459 | 16.6k | OP::OpCode opcode = OP::OpCode::NumOpCodes; |
4460 | 16.6k | bool IsTyped = true; |
4461 | 16.6k | switch (RK) { |
4462 | 3.06k | case DxilResource::Kind::RawBuffer: |
4463 | 13.3k | case DxilResource::Kind::StructuredBuffer: |
4464 | 13.3k | IsTyped = false; |
4465 | 13.3k | opcode = OP::OpCode::RawBufferStore; |
4466 | | // Where shader model and type allows, use vector store intrinsic. |
4467 | 13.3k | if (OP->GetModule()->GetHLModule().GetShaderModel()->IsSM69Plus() && |
4468 | 13.3k | Ty->isVectorTy()4.84k && Ty->getVectorNumElements() > 13.76k ) |
4469 | 2.35k | opcode = OP::OpCode::RawBufferVectorStore; |
4470 | 13.3k | break; |
4471 | 1.19k | case DxilResource::Kind::TypedBuffer: |
4472 | 1.19k | opcode = OP::OpCode::BufferStore; |
4473 | 1.19k | break; |
4474 | 0 | case DxilResource::Kind::Invalid: |
4475 | 0 | DXASSERT(0, "invalid resource kind"); |
4476 | 0 | break; |
4477 | 48 | case DxilResource::Kind::Texture2DMS: |
4478 | 80 | case DxilResource::Kind::Texture2DMSArray: |
4479 | 80 | opcode = OP::OpCode::TextureStoreSample; |
4480 | 80 | break; |
4481 | 1.97k | default: |
4482 | 1.97k | opcode = OP::OpCode::TextureStore; |
4483 | 1.97k | break; |
4484 | 16.6k | } |
4485 | | |
4486 | 16.6k | Type *i32Ty = Builder.getInt32Ty(); |
4487 | 16.6k | Type *i64Ty = Builder.getInt64Ty(); |
4488 | 16.6k | Type *doubleTy = Builder.getDoubleTy(); |
4489 | 16.6k | Type *EltTy = Ty->getScalarType(); |
4490 | 16.6k | if (EltTy->isIntegerTy(1)) { |
4491 | | // Since we're going to memory, convert bools to their memory |
4492 | | // representation. |
4493 | 344 | EltTy = i32Ty; |
4494 | 344 | if (Ty->isVectorTy()) |
4495 | 316 | Ty = VectorType::get(EltTy, Ty->getVectorNumElements()); |
4496 | 28 | else |
4497 | 28 | Ty = EltTy; |
4498 | 344 | val = Builder.CreateZExt(val, Ty); |
4499 | 344 | } |
4500 | | |
4501 | | // If RawBuffer store of 64-bit value, don't set alignment to 8, |
4502 | | // since buffer alignment isn't known to be anything over 4. |
4503 | 16.6k | unsigned alignValue = OP->GetAllocSizeForType(EltTy); |
4504 | 16.6k | if (RK == HLResource::Kind::RawBuffer && alignValue > 43.06k ) |
4505 | 232 | alignValue = 4; |
4506 | 16.6k | Constant *Alignment = OP->GetI32Const(alignValue); |
4507 | 16.6k | bool is64 = EltTy == i64Ty || EltTy == doubleTy15.6k ; |
4508 | 16.6k | if (is64 && IsTyped1.82k ) { |
4509 | 218 | EltTy = i32Ty; |
4510 | 218 | } |
4511 | | |
4512 | 16.6k | llvm::Constant *opArg = OP->GetU32Const((unsigned)opcode); |
4513 | | |
4514 | 16.6k | llvm::Value *undefI = |
4515 | 16.6k | llvm::UndefValue::get(llvm::Type::getInt32Ty(Ty->getContext())); |
4516 | | |
4517 | 16.6k | llvm::Value *undefVal = llvm::UndefValue::get(Ty->getScalarType()); |
4518 | | |
4519 | 16.6k | SmallVector<Value *, 13> storeArgs; |
4520 | 16.6k | storeArgs.emplace_back(opArg); // opcode |
4521 | 16.6k | storeArgs.emplace_back(handle); // resource handle |
4522 | | |
4523 | 16.6k | unsigned OffsetIdx = 0; |
4524 | 16.6k | if (opcode == OP::OpCode::RawBufferStore || |
4525 | 16.6k | opcode == OP::OpCode::RawBufferVectorStore5.60k || |
4526 | 16.6k | opcode == OP::OpCode::BufferStore3.24k ) { |
4527 | | // Append Coord0 (Index) value. |
4528 | 14.5k | if (Idx->getType()->isVectorTy()) { |
4529 | 0 | Value *ScalarIdx = Builder.CreateExtractElement(Idx, (uint64_t)0); |
4530 | 0 | storeArgs.emplace_back(ScalarIdx); // Coord0 (Index). |
4531 | 14.5k | } else { |
4532 | 14.5k | storeArgs.emplace_back(Idx); // Coord0 (Index). |
4533 | 14.5k | } |
4534 | | |
4535 | | // Store OffsetIdx representing the argument that may need to be incremented |
4536 | | // later to load additional chunks of data. |
4537 | | // Only structured buffers can use the offset parameter. |
4538 | | // Others must increment the index. |
4539 | 14.5k | if (RK == DxilResource::Kind::StructuredBuffer) |
4540 | 10.3k | OffsetIdx = storeArgs.size(); |
4541 | 4.25k | else |
4542 | 4.25k | OffsetIdx = storeArgs.size() - 1; |
4543 | | |
4544 | | // Coord1 (Offset). |
4545 | 14.5k | storeArgs.emplace_back(offset); |
4546 | 14.5k | } else { |
4547 | | // texture store |
4548 | 2.05k | unsigned coordSize = DxilResource::GetNumCoords(RK); |
4549 | | |
4550 | | // Set x first. |
4551 | 2.05k | if (Idx->getType()->isVectorTy()) |
4552 | 1.59k | storeArgs.emplace_back(Builder.CreateExtractElement(Idx, (uint64_t)0)); |
4553 | 466 | else |
4554 | 466 | storeArgs.emplace_back(Idx); |
4555 | | |
4556 | 6.16k | for (unsigned i = 1; i < 3; i++4.11k ) { |
4557 | 4.11k | if (i < coordSize) |
4558 | 1.70k | storeArgs.emplace_back(Builder.CreateExtractElement(Idx, i)); |
4559 | 2.41k | else |
4560 | 2.41k | storeArgs.emplace_back(undefI); |
4561 | 4.11k | } |
4562 | | // TODO: support mip for texture ST |
4563 | 2.05k | } |
4564 | | |
4565 | | // RawBufferVectorStore only takes a single value and alignment arguments. |
4566 | 16.6k | if (opcode == DXIL::OpCode::RawBufferVectorStore) { |
4567 | 2.35k | storeArgs.emplace_back(val); |
4568 | 2.35k | storeArgs.emplace_back(Alignment); |
4569 | 2.35k | Function *F = OP->GetOpFunc(DXIL::OpCode::RawBufferVectorStore, Ty); |
4570 | 2.35k | Builder.CreateCall(F, storeArgs); |
4571 | 2.35k | return; |
4572 | 2.35k | } |
4573 | 14.2k | Function *F = OP->GetOpFunc(opcode, EltTy); |
4574 | | |
4575 | 14.2k | constexpr unsigned MaxStoreElemCount = 4; |
4576 | 14.2k | const unsigned CompCount = Ty->isVectorTy() ? Ty->getVectorNumElements()8.15k : 16.12k ; |
4577 | 14.2k | const unsigned StoreInstCount = |
4578 | 14.2k | (CompCount / MaxStoreElemCount) + (CompCount % MaxStoreElemCount != 0); |
4579 | 14.2k | SmallVector<decltype(storeArgs), 4> storeArgsList; |
4580 | | |
4581 | | // Max number of element to store should be 16 (for a 4x4 matrix) |
4582 | 14.2k | DXASSERT_NOMSG(StoreInstCount >= 1 && StoreInstCount <= 4); |
4583 | | |
4584 | | // If number of elements to store exceeds the maximum number of elements |
4585 | | // that can be stored in a single store call, make sure to generate enough |
4586 | | // store calls to store all elements |
4587 | 29.0k | for (unsigned j = 0; j < StoreInstCount; j++14.7k ) { |
4588 | 14.7k | decltype(storeArgs) newStoreArgs; |
4589 | 14.7k | for (Value *storeArg : storeArgs) |
4590 | 60.9k | newStoreArgs.emplace_back(storeArg); |
4591 | 14.7k | storeArgsList.emplace_back(newStoreArgs); |
4592 | 14.7k | } |
4593 | | |
4594 | 29.0k | for (unsigned j = 0; j < storeArgsList.size(); j++14.7k ) { |
4595 | | // For second and subsequent store calls, increment the resource-appropriate |
4596 | | // index or offset parameter. |
4597 | 14.7k | if (j > 0) { |
4598 | 436 | unsigned EltSize = OP->GetAllocSizeForType(EltTy); |
4599 | 436 | unsigned NewCoord = EltSize * MaxStoreElemCount * j; |
4600 | 436 | Value *NewCoordVal = ConstantInt::get(Builder.getInt32Ty(), NewCoord); |
4601 | 436 | NewCoordVal = Builder.CreateAdd(storeArgsList[0][OffsetIdx], NewCoordVal); |
4602 | 436 | storeArgsList[j][OffsetIdx] = NewCoordVal; |
4603 | 436 | } |
4604 | | |
4605 | | // Set value parameters. |
4606 | 14.7k | uint8_t mask = 0; |
4607 | 14.7k | if (Ty->isVectorTy()) { |
4608 | 8.59k | unsigned vecSize = |
4609 | 8.59k | std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - |
4610 | 8.59k | (j * MaxStoreElemCount); |
4611 | 8.59k | Value *emptyVal = undefVal; |
4612 | 8.59k | if (IsTyped) { |
4613 | 1.69k | mask = DXIL::kCompMask_All; |
4614 | 1.69k | emptyVal = Builder.CreateExtractElement(val, (uint64_t)0); |
4615 | 1.69k | } |
4616 | | |
4617 | 42.9k | for (unsigned i = 0; i < MaxStoreElemCount; i++34.3k ) { |
4618 | 34.3k | if (i < vecSize) { |
4619 | 23.5k | storeArgsList[j].emplace_back( |
4620 | 23.5k | Builder.CreateExtractElement(val, (j * MaxStoreElemCount) + i)); |
4621 | 23.5k | mask |= (1 << i); |
4622 | 23.5k | } else { |
4623 | 10.8k | storeArgsList[j].emplace_back(emptyVal); |
4624 | 10.8k | } |
4625 | 34.3k | } |
4626 | | |
4627 | 8.59k | } else { |
4628 | 6.12k | if (IsTyped) { |
4629 | 1.55k | mask = DXIL::kCompMask_All; |
4630 | 1.55k | storeArgsList[j].emplace_back(val); |
4631 | 1.55k | storeArgsList[j].emplace_back(val); |
4632 | 1.55k | storeArgsList[j].emplace_back(val); |
4633 | 1.55k | storeArgsList[j].emplace_back(val); |
4634 | 4.57k | } else { |
4635 | 4.57k | storeArgsList[j].emplace_back(val); |
4636 | 4.57k | storeArgsList[j].emplace_back(undefVal); |
4637 | 4.57k | storeArgsList[j].emplace_back(undefVal); |
4638 | 4.57k | storeArgsList[j].emplace_back(undefVal); |
4639 | 4.57k | mask = DXIL::kCompMask_X; |
4640 | 4.57k | } |
4641 | 6.12k | } |
4642 | | |
4643 | 14.7k | if (is64 && IsTyped1.49k ) { |
4644 | 218 | unsigned size = 1; |
4645 | 218 | if (Ty->isVectorTy()) { |
4646 | 36 | size = |
4647 | 36 | std::min((j + 1) * MaxStoreElemCount, Ty->getVectorNumElements()) - |
4648 | 36 | (j * MaxStoreElemCount); |
4649 | 36 | } |
4650 | 218 | DXASSERT(size <= 2, "raw/typed buffer only allow 4 dwords"); |
4651 | 218 | unsigned val0OpIdx = opcode == DXIL::OpCode::TextureStore || |
4652 | 218 | opcode == DXIL::OpCode::TextureStoreSample114 |
4653 | 218 | ? DXIL::OperandIndex::kTextureStoreVal0OpIdx112 |
4654 | 218 | : DXIL::OperandIndex::kBufferStoreVal0OpIdx106 ; |
4655 | 218 | Value *V0 = storeArgsList[j][val0OpIdx]; |
4656 | 218 | Value *V1 = storeArgsList[j][val0OpIdx + 1]; |
4657 | | |
4658 | 218 | Value *vals32[4]; |
4659 | 218 | EltTy = Ty->getScalarType(); |
4660 | 218 | Split64bitValForStore(EltTy, {V0, V1}, size, vals32, OP, Builder); |
4661 | | // Fill the uninit vals. |
4662 | 218 | if (size == 1) { |
4663 | 190 | vals32[2] = vals32[0]; |
4664 | 190 | vals32[3] = vals32[1]; |
4665 | 190 | } |
4666 | | // Change valOp to 32 version. |
4667 | 1.09k | for (unsigned i = 0; i < 4; i++872 ) { |
4668 | 872 | storeArgsList[j][val0OpIdx + i] = vals32[i]; |
4669 | 872 | } |
4670 | | // change mask for double |
4671 | 218 | if (opcode == DXIL::OpCode::RawBufferStore) { |
4672 | 0 | mask = size == 1 ? DXIL::kCompMask_X | DXIL::kCompMask_Y |
4673 | 0 | : DXIL::kCompMask_All; |
4674 | 0 | } |
4675 | 218 | } |
4676 | | |
4677 | 14.7k | storeArgsList[j].emplace_back(OP->GetU8Const(mask)); // mask |
4678 | 14.7k | if (opcode == DXIL::OpCode::RawBufferStore) |
4679 | 11.4k | storeArgsList[j].emplace_back(Alignment); // alignment only for raw buffer |
4680 | 3.24k | else if (opcode == DXIL::OpCode::TextureStoreSample) { |
4681 | 80 | storeArgsList[j].emplace_back( |
4682 | 80 | sampIdx ? sampIdx40 |
4683 | 80 | : Builder.getInt32(0)40 ); // sample idx only for MS textures |
4684 | 80 | } |
4685 | 14.7k | Builder.CreateCall(F, storeArgsList[j]); |
4686 | 14.7k | } |
4687 | 14.2k | } |
4688 | | |
4689 | | Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
4690 | | HLOperationLowerHelper &helper, |
4691 | | HLObjectOperationLowerHelper *pObjHelper, |
4692 | 3.06k | bool &Translated) { |
4693 | 3.06k | hlsl::OP *hlslOP = &helper.hlslOP; |
4694 | 3.06k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
4695 | | |
4696 | 3.06k | IRBuilder<> Builder(CI); |
4697 | 3.06k | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
4698 | | |
4699 | 3.06k | Value *val = CI->getArgOperand(HLOperandIndex::kStoreValOpIdx); |
4700 | 3.06k | Value *offset = CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx); |
4701 | 3.06k | Value *UndefI = UndefValue::get(Builder.getInt32Ty()); |
4702 | 3.06k | TranslateStore(RK, handle, val, offset, UndefI, Builder, hlslOP); |
4703 | | |
4704 | 3.06k | return nullptr; |
4705 | 3.06k | } |
4706 | | } // namespace |
4707 | | |
4708 | | // Atomic intrinsics. |
4709 | | namespace { |
4710 | | // Atomic intrinsics. |
4711 | | struct AtomicHelper { |
4712 | | AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType = nullptr); |
4713 | | AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx, |
4714 | | Value *baseOffset, Type *opType = nullptr); |
4715 | | OP::OpCode opcode; |
4716 | | Value *handle; |
4717 | | Value *addr; |
4718 | | Value *offset; // Offset for structrued buffer. |
4719 | | Value *value; |
4720 | | Value *originalValue; |
4721 | | Value *compareValue; |
4722 | | Type *operationType; |
4723 | | }; |
4724 | | |
4725 | | // For MOP version of Interlocked*. |
4726 | | AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Type *opType) |
4727 | 2.48k | : opcode(op), handle(h), offset(nullptr), originalValue(nullptr), |
4728 | 2.48k | operationType(opType) { |
4729 | 2.48k | addr = CI->getArgOperand(HLOperandIndex::kObjectInterlockedDestOpIndex); |
4730 | 2.48k | if (op == OP::OpCode::AtomicCompareExchange) { |
4731 | 962 | compareValue = CI->getArgOperand( |
4732 | 962 | HLOperandIndex::kObjectInterlockedCmpCompareValueOpIndex); |
4733 | 962 | value = |
4734 | 962 | CI->getArgOperand(HLOperandIndex::kObjectInterlockedCmpValueOpIndex); |
4735 | 962 | if (CI->getNumArgOperands() == |
4736 | 962 | (HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex + 1)) |
4737 | 526 | originalValue = CI->getArgOperand( |
4738 | 526 | HLOperandIndex::kObjectInterlockedCmpOriginalValueOpIndex); |
4739 | 1.52k | } else { |
4740 | 1.52k | value = CI->getArgOperand(HLOperandIndex::kObjectInterlockedValueOpIndex); |
4741 | 1.52k | if (CI->getNumArgOperands() == |
4742 | 1.52k | (HLOperandIndex::kObjectInterlockedOriginalValueOpIndex + 1)) |
4743 | 1.34k | originalValue = CI->getArgOperand( |
4744 | 1.34k | HLOperandIndex::kObjectInterlockedOriginalValueOpIndex); |
4745 | 1.52k | } |
4746 | 2.48k | if (nullptr == operationType) |
4747 | 2.32k | operationType = value->getType(); |
4748 | 2.48k | } |
4749 | | // For IOP version of Interlocked*. |
4750 | | AtomicHelper::AtomicHelper(CallInst *CI, OP::OpCode op, Value *h, Value *bufIdx, |
4751 | | Value *baseOffset, Type *opType) |
4752 | 4.18k | : opcode(op), handle(h), addr(bufIdx), offset(baseOffset), |
4753 | 4.18k | originalValue(nullptr), operationType(opType) { |
4754 | 4.18k | if (op == OP::OpCode::AtomicCompareExchange) { |
4755 | 1.42k | compareValue = |
4756 | 1.42k | CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex); |
4757 | 1.42k | value = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex); |
4758 | 1.42k | if (CI->getNumArgOperands() == |
4759 | 1.42k | (HLOperandIndex::kInterlockedCmpOriginalValueOpIndex + 1)) |
4760 | 692 | originalValue = CI->getArgOperand( |
4761 | 692 | HLOperandIndex::kInterlockedCmpOriginalValueOpIndex); |
4762 | 2.75k | } else { |
4763 | 2.75k | value = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex); |
4764 | 2.75k | if (CI->getNumArgOperands() == |
4765 | 2.75k | (HLOperandIndex::kInterlockedOriginalValueOpIndex + 1)) |
4766 | 720 | originalValue = |
4767 | 720 | CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex); |
4768 | 2.75k | } |
4769 | 4.18k | if (nullptr == operationType) |
4770 | 4.10k | operationType = value->getType(); |
4771 | 4.18k | } |
4772 | | |
4773 | | void TranslateAtomicBinaryOperation(AtomicHelper &helper, |
4774 | | DXIL::AtomicBinOpCode atomicOp, |
4775 | 4.28k | IRBuilder<> &Builder, hlsl::OP *hlslOP) { |
4776 | 4.28k | Value *handle = helper.handle; |
4777 | 4.28k | Value *addr = helper.addr; |
4778 | 4.28k | Value *val = helper.value; |
4779 | 4.28k | Type *Ty = helper.operationType; |
4780 | 4.28k | Type *valTy = val->getType(); |
4781 | | |
4782 | 4.28k | Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext())); |
4783 | | |
4784 | 4.28k | Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType()); |
4785 | 4.28k | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode)); |
4786 | 4.28k | Value *atomicOpArg = hlslOP->GetU32Const(static_cast<unsigned>(atomicOp)); |
4787 | | |
4788 | 4.28k | if (Ty != valTy) |
4789 | 72 | val = Builder.CreateBitCast(val, Ty); |
4790 | | |
4791 | 4.28k | Value *args[] = {opArg, handle, atomicOpArg, |
4792 | 4.28k | undefI, undefI, undefI, // coordinates |
4793 | 4.28k | val}; |
4794 | | |
4795 | | // Setup coordinates. |
4796 | 4.28k | if (addr->getType()->isVectorTy()) { |
4797 | 250 | unsigned vectorNumElements = addr->getType()->getVectorNumElements(); |
4798 | 250 | DXASSERT(vectorNumElements <= 3, "up to 3 elements for atomic binary op"); |
4799 | 250 | assert(vectorNumElements <= 3); |
4800 | 846 | for (unsigned i = 0; i < vectorNumElements; i++596 ) { |
4801 | 596 | Value *Elt = Builder.CreateExtractElement(addr, i); |
4802 | 596 | args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx + i] = Elt; |
4803 | 596 | } |
4804 | 250 | } else |
4805 | 4.03k | args[DXIL::OperandIndex::kAtomicBinOpCoord0OpIdx] = addr; |
4806 | | |
4807 | | // Set offset for structured buffer. |
4808 | 4.28k | if (helper.offset) |
4809 | 1.00k | args[DXIL::OperandIndex::kAtomicBinOpCoord1OpIdx] = helper.offset; |
4810 | | |
4811 | 4.28k | Value *origVal = |
4812 | 4.28k | Builder.CreateCall(dxilAtomic, args, hlslOP->GetAtomicOpName(atomicOp)); |
4813 | 4.28k | if (helper.originalValue) { |
4814 | 2.06k | if (Ty != valTy) |
4815 | 72 | origVal = Builder.CreateBitCast(origVal, valTy); |
4816 | 2.06k | Builder.CreateStore(origVal, helper.originalValue); |
4817 | 2.06k | } |
4818 | 4.28k | } |
4819 | | |
4820 | | Value *TranslateMopAtomicBinaryOperation( |
4821 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
4822 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
4823 | 1.52k | bool &Translated) { |
4824 | 1.52k | hlsl::OP *hlslOP = &helper.hlslOP; |
4825 | | |
4826 | 1.52k | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
4827 | 1.52k | IRBuilder<> Builder(CI); |
4828 | | |
4829 | 1.52k | switch (IOP) { |
4830 | 244 | case IntrinsicOp::MOP_InterlockedAdd: |
4831 | 316 | case IntrinsicOp::MOP_InterlockedAdd64: { |
4832 | 316 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4833 | 316 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, Builder, |
4834 | 316 | hlslOP); |
4835 | 316 | } break; |
4836 | 72 | case IntrinsicOp::MOP_InterlockedAnd: |
4837 | 144 | case IntrinsicOp::MOP_InterlockedAnd64: { |
4838 | 144 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4839 | 144 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, Builder, |
4840 | 144 | hlslOP); |
4841 | 144 | } break; |
4842 | 216 | case IntrinsicOp::MOP_InterlockedExchange: |
4843 | 424 | case IntrinsicOp::MOP_InterlockedExchange64: { |
4844 | 424 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4845 | 424 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange, |
4846 | 424 | Builder, hlslOP); |
4847 | 424 | } break; |
4848 | 48 | case IntrinsicOp::MOP_InterlockedExchangeFloat: { |
4849 | 48 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle, |
4850 | 48 | Type::getInt32Ty(CI->getContext())); |
4851 | 48 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange, |
4852 | 48 | Builder, hlslOP); |
4853 | 48 | } break; |
4854 | 58 | case IntrinsicOp::MOP_InterlockedMax: |
4855 | 118 | case IntrinsicOp::MOP_InterlockedMax64: { |
4856 | 118 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4857 | 118 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, Builder, |
4858 | 118 | hlslOP); |
4859 | 118 | } break; |
4860 | 58 | case IntrinsicOp::MOP_InterlockedMin: |
4861 | 118 | case IntrinsicOp::MOP_InterlockedMin64: { |
4862 | 118 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4863 | 118 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, Builder, |
4864 | 118 | hlslOP); |
4865 | 118 | } break; |
4866 | 34 | case IntrinsicOp::MOP_InterlockedUMax: { |
4867 | 34 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4868 | 34 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, Builder, |
4869 | 34 | hlslOP); |
4870 | 34 | } break; |
4871 | 34 | case IntrinsicOp::MOP_InterlockedUMin: { |
4872 | 34 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4873 | 34 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, Builder, |
4874 | 34 | hlslOP); |
4875 | 34 | } break; |
4876 | 72 | case IntrinsicOp::MOP_InterlockedOr: |
4877 | 144 | case IntrinsicOp::MOP_InterlockedOr64: { |
4878 | 144 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4879 | 144 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, Builder, |
4880 | 144 | hlslOP); |
4881 | 144 | } break; |
4882 | 72 | case IntrinsicOp::MOP_InterlockedXor: |
4883 | 144 | case IntrinsicOp::MOP_InterlockedXor64: |
4884 | 144 | default: { |
4885 | 144 | DXASSERT(IOP == IntrinsicOp::MOP_InterlockedXor || |
4886 | 144 | IOP == IntrinsicOp::MOP_InterlockedXor64, |
4887 | 144 | "invalid MOP atomic intrinsic"); |
4888 | 144 | AtomicHelper helper(CI, DXIL::OpCode::AtomicBinOp, handle); |
4889 | 144 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, Builder, |
4890 | 144 | hlslOP); |
4891 | 144 | } break; |
4892 | 1.52k | } |
4893 | | |
4894 | 1.52k | return nullptr; |
4895 | 1.52k | } |
4896 | | void TranslateAtomicCmpXChg(AtomicHelper &helper, IRBuilder<> &Builder, |
4897 | 2.38k | hlsl::OP *hlslOP) { |
4898 | 2.38k | Value *handle = helper.handle; |
4899 | 2.38k | Value *addr = helper.addr; |
4900 | 2.38k | Value *val = helper.value; |
4901 | 2.38k | Value *cmpVal = helper.compareValue; |
4902 | | |
4903 | 2.38k | Type *Ty = helper.operationType; |
4904 | 2.38k | Type *valTy = val->getType(); |
4905 | | |
4906 | 2.38k | Value *undefI = UndefValue::get(Type::getInt32Ty(Ty->getContext())); |
4907 | | |
4908 | 2.38k | Function *dxilAtomic = hlslOP->GetOpFunc(helper.opcode, Ty->getScalarType()); |
4909 | 2.38k | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(helper.opcode)); |
4910 | | |
4911 | 2.38k | if (Ty != valTy) { |
4912 | 168 | val = Builder.CreateBitCast(val, Ty); |
4913 | 168 | if (cmpVal) |
4914 | 168 | cmpVal = Builder.CreateBitCast(cmpVal, Ty); |
4915 | 168 | } |
4916 | | |
4917 | 2.38k | Value *args[] = {opArg, handle, undefI, undefI, undefI, // coordinates |
4918 | 2.38k | cmpVal, val}; |
4919 | | |
4920 | | // Setup coordinates. |
4921 | 2.38k | if (addr->getType()->isVectorTy()) { |
4922 | 60 | unsigned vectorNumElements = addr->getType()->getVectorNumElements(); |
4923 | 60 | DXASSERT(vectorNumElements <= 3, "up to 3 elements in atomic op"); |
4924 | 60 | assert(vectorNumElements <= 3); |
4925 | 196 | for (unsigned i = 0; i < vectorNumElements; i++136 ) { |
4926 | 136 | Value *Elt = Builder.CreateExtractElement(addr, i); |
4927 | 136 | args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx + i] = Elt; |
4928 | 136 | } |
4929 | 60 | } else |
4930 | 2.32k | args[DXIL::OperandIndex::kAtomicCmpExchangeCoord0OpIdx] = addr; |
4931 | | |
4932 | | // Set offset for structured buffer. |
4933 | 2.38k | if (helper.offset) |
4934 | 536 | args[DXIL::OperandIndex::kAtomicCmpExchangeCoord1OpIdx] = helper.offset; |
4935 | | |
4936 | 2.38k | Value *origVal = Builder.CreateCall(dxilAtomic, args); |
4937 | 2.38k | if (helper.originalValue) { |
4938 | 1.21k | if (Ty != valTy) |
4939 | 84 | origVal = Builder.CreateBitCast(origVal, valTy); |
4940 | 1.21k | Builder.CreateStore(origVal, helper.originalValue); |
4941 | 1.21k | } |
4942 | 2.38k | } |
4943 | | |
4944 | | Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP, |
4945 | | OP::OpCode opcode, |
4946 | | HLOperationLowerHelper &helper, |
4947 | | HLObjectOperationLowerHelper *pObjHelper, |
4948 | 962 | bool &Translated) { |
4949 | 962 | hlsl::OP *hlslOP = &helper.hlslOP; |
4950 | | |
4951 | 962 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
4952 | 962 | IRBuilder<> Builder(CI); |
4953 | 962 | Type *opType = nullptr; |
4954 | 962 | if (IOP == IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise || |
4955 | 962 | IOP == IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise906 ) |
4956 | 112 | opType = Type::getInt32Ty(CI->getContext()); |
4957 | 962 | AtomicHelper atomicHelper(CI, OP::OpCode::AtomicCompareExchange, handle, |
4958 | 962 | opType); |
4959 | 962 | TranslateAtomicCmpXChg(atomicHelper, Builder, hlslOP); |
4960 | 962 | return nullptr; |
4961 | 962 | } |
4962 | | |
4963 | | void TranslateSharedMemOrNodeAtomicBinOp(CallInst *CI, IntrinsicOp IOP, |
4964 | 1.49k | Value *addr) { |
4965 | 1.49k | AtomicRMWInst::BinOp Op; |
4966 | 1.49k | IRBuilder<> Builder(CI); |
4967 | 1.49k | Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex); |
4968 | 1.49k | PointerType *ptrType = dyn_cast<PointerType>( |
4969 | 1.49k | CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType()); |
4970 | 1.49k | bool needCast = ptrType && ptrType->getElementType()->isFloatTy(); |
4971 | 1.49k | switch (IOP) { |
4972 | 376 | case IntrinsicOp::IOP_InterlockedAdd: |
4973 | 376 | Op = AtomicRMWInst::BinOp::Add; |
4974 | 376 | break; |
4975 | 104 | case IntrinsicOp::IOP_InterlockedAnd: |
4976 | 104 | Op = AtomicRMWInst::BinOp::And; |
4977 | 104 | break; |
4978 | 472 | case IntrinsicOp::IOP_InterlockedExchange: |
4979 | 472 | if (needCast) { |
4980 | 48 | val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext())); |
4981 | 48 | addr = Builder.CreateBitCast( |
4982 | 48 | addr, Type::getInt32PtrTy(CI->getContext(), |
4983 | 48 | addr->getType()->getPointerAddressSpace())); |
4984 | 48 | } |
4985 | 472 | Op = AtomicRMWInst::BinOp::Xchg; |
4986 | 472 | break; |
4987 | 68 | case IntrinsicOp::IOP_InterlockedMax: |
4988 | 68 | Op = AtomicRMWInst::BinOp::Max; |
4989 | 68 | break; |
4990 | 84 | case IntrinsicOp::IOP_InterlockedUMax: |
4991 | 84 | Op = AtomicRMWInst::BinOp::UMax; |
4992 | 84 | break; |
4993 | 60 | case IntrinsicOp::IOP_InterlockedMin: |
4994 | 60 | Op = AtomicRMWInst::BinOp::Min; |
4995 | 60 | break; |
4996 | 68 | case IntrinsicOp::IOP_InterlockedUMin: |
4997 | 68 | Op = AtomicRMWInst::BinOp::UMin; |
4998 | 68 | break; |
4999 | 156 | case IntrinsicOp::IOP_InterlockedOr: |
5000 | 156 | Op = AtomicRMWInst::BinOp::Or; |
5001 | 156 | break; |
5002 | 104 | case IntrinsicOp::IOP_InterlockedXor: |
5003 | 104 | default: |
5004 | 104 | DXASSERT(IOP == IntrinsicOp::IOP_InterlockedXor, "Invalid Intrinsic"); |
5005 | 104 | Op = AtomicRMWInst::BinOp::Xor; |
5006 | 104 | break; |
5007 | 1.49k | } |
5008 | | |
5009 | 1.49k | Value *Result = Builder.CreateAtomicRMW( |
5010 | 1.49k | Op, addr, val, AtomicOrdering::SequentiallyConsistent); |
5011 | 1.49k | if (CI->getNumArgOperands() > |
5012 | 1.49k | HLOperandIndex::kInterlockedOriginalValueOpIndex) { |
5013 | 574 | if (needCast) |
5014 | 48 | Result = |
5015 | 48 | Builder.CreateBitCast(Result, Type::getFloatTy(CI->getContext())); |
5016 | 574 | Builder.CreateStore( |
5017 | 574 | Result, |
5018 | 574 | CI->getArgOperand(HLOperandIndex::kInterlockedOriginalValueOpIndex)); |
5019 | 574 | } |
5020 | 1.49k | } |
5021 | | |
5022 | 3.65k | static Value *SkipAddrSpaceCast(Value *Ptr) { |
5023 | 3.65k | if (AddrSpaceCastInst *CastInst = dyn_cast<AddrSpaceCastInst>(Ptr)) |
5024 | 2.25k | return CastInst->getOperand(0); |
5025 | 1.40k | if (ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Ptr)) { |
5026 | 400 | if (ConstExpr->getOpcode() == Instruction::AddrSpaceCast) { |
5027 | 400 | return ConstExpr->getOperand(0); |
5028 | 400 | } |
5029 | 400 | } |
5030 | 1.00k | return Ptr; |
5031 | 1.40k | } |
5032 | | |
5033 | | Value * |
5034 | | TranslateNodeIncrementOutputCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
5035 | | HLOperationLowerHelper &helper, |
5036 | | HLObjectOperationLowerHelper *pObjHelper, |
5037 | 84 | bool isPerThread, bool &Translated) { |
5038 | | |
5039 | 84 | hlsl::OP *OP = &helper.hlslOP; |
5040 | 84 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5041 | 84 | Value *count = |
5042 | 84 | CI->getArgOperand(HLOperandIndex::kIncrementOutputCountCountIdx); |
5043 | 84 | Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); |
5044 | 84 | Value *opArg = OP->GetU32Const((unsigned)op); |
5045 | 84 | Value *perThread = OP->GetI1Const(isPerThread); |
5046 | | |
5047 | 84 | Value *args[] = {opArg, handle, count, perThread}; |
5048 | | |
5049 | 84 | IRBuilder<> Builder(CI); |
5050 | 84 | Builder.CreateCall(dxilFunc, args); |
5051 | 84 | return nullptr; |
5052 | 84 | } |
5053 | | |
5054 | | /* |
5055 | | HLSL: |
5056 | | void EmptyNodeOutput::GroupIncrementOutputCount(uint count) |
5057 | | DXIL: |
5058 | | void @dx.op.groupIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle |
5059 | | %NodeOutput, i32 count) |
5060 | | */ |
5061 | | Value *TranslateNodeGroupIncrementOutputCount( |
5062 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
5063 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5064 | 76 | bool &Translated) { |
5065 | 76 | return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper, |
5066 | 76 | /*isPerThread*/ false, Translated); |
5067 | 76 | } |
5068 | | |
5069 | | /* |
5070 | | HLSL: |
5071 | | void EmptyNodeOutput::ThreadIncrementOutputCount(uint count) |
5072 | | DXIL: |
5073 | | void @dx.op.threadIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle |
5074 | | %NodeOutput, i32 count) |
5075 | | */ |
5076 | | Value *TranslateNodeThreadIncrementOutputCount( |
5077 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
5078 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5079 | 8 | bool &Translated) { |
5080 | 8 | return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper, |
5081 | 8 | /*isPerThread*/ true, Translated); |
5082 | 8 | } |
5083 | | |
5084 | | // For known non-groupshared, verify that the destination param is valid |
5085 | | void ValidateAtomicDestination(CallInst *CI, |
5086 | 1.00k | HLObjectOperationLowerHelper *pObjHelper) { |
5087 | 1.00k | Value *dest = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex); |
5088 | | // If we encounter a gep, we may provide a more specific error message |
5089 | 1.00k | bool hasGep = isa<GetElementPtrInst>(dest); |
5090 | | |
5091 | | // Confirm that dest is a properly-used UAV |
5092 | | |
5093 | | // Drill through subscripts and geps, anything else indicates a misuse |
5094 | 2.23k | while (true) { |
5095 | 2.23k | if (GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(dest)) { |
5096 | 284 | dest = gep->getPointerOperand(); |
5097 | 284 | continue; |
5098 | 284 | } |
5099 | 1.95k | if (CallInst *handle = dyn_cast<CallInst>(dest)) { |
5100 | 1.86k | hlsl::HLOpcodeGroup group = |
5101 | 1.86k | hlsl::GetHLOpcodeGroup(handle->getCalledFunction()); |
5102 | 1.86k | if (group != HLOpcodeGroup::HLSubscript) |
5103 | 914 | break; |
5104 | 946 | dest = handle->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
5105 | 946 | continue; |
5106 | 1.86k | } |
5107 | 90 | break; |
5108 | 1.95k | } |
5109 | | |
5110 | 1.00k | if (pObjHelper->GetRC(dest) == DXIL::ResourceClass::UAV) { |
5111 | 914 | DXIL::ResourceKind RK = pObjHelper->GetRK(dest); |
5112 | 914 | if (DXIL::IsStructuredBuffer(RK)) |
5113 | 404 | return; // no errors |
5114 | 510 | if (DXIL::IsTyped(RK)) { |
5115 | 510 | if (hasGep) |
5116 | 16 | dxilutil::EmitErrorOnInstruction( |
5117 | 16 | CI, "Typed resources used in atomic operations must have a scalar " |
5118 | 16 | "element type."); |
5119 | 510 | return; // error emitted or else no errors |
5120 | 510 | } |
5121 | 510 | } |
5122 | | |
5123 | 90 | dxilutil::EmitErrorOnInstruction( |
5124 | 90 | CI, "Atomic operation targets must be groupshared, Node Record or UAV."); |
5125 | 90 | } |
5126 | | |
5127 | | Value *TranslateIopAtomicBinaryOperation( |
5128 | | CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
5129 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5130 | 2.42k | bool &Translated) { |
5131 | 2.42k | Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex); |
5132 | 2.42k | addr = SkipAddrSpaceCast(addr); |
5133 | | |
5134 | 2.42k | unsigned addressSpace = addr->getType()->getPointerAddressSpace(); |
5135 | 2.42k | if (addressSpace == DXIL::kTGSMAddrSpace || |
5136 | 2.42k | addressSpace == DXIL::kNodeRecordAddrSpace974 ) |
5137 | 1.49k | TranslateSharedMemOrNodeAtomicBinOp(CI, IOP, addr); |
5138 | 928 | else { |
5139 | | // If not groupshared or node record, we either have an error case or will |
5140 | | // translate the atomic op in the process of translating users of the |
5141 | | // subscript operator Mark not translated and validate dest param |
5142 | 928 | Translated = false; |
5143 | 928 | ValidateAtomicDestination(CI, pObjHelper); |
5144 | 928 | } |
5145 | | |
5146 | 2.42k | return nullptr; |
5147 | 2.42k | } |
5148 | | |
5149 | 1.16k | void TranslateSharedMemOrNodeAtomicCmpXChg(CallInst *CI, Value *addr) { |
5150 | 1.16k | Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex); |
5151 | 1.16k | Value *cmpVal = |
5152 | 1.16k | CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex); |
5153 | 1.16k | IRBuilder<> Builder(CI); |
5154 | | |
5155 | 1.16k | PointerType *ptrType = dyn_cast<PointerType>( |
5156 | 1.16k | CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex)->getType()); |
5157 | 1.16k | bool needCast = false; |
5158 | 1.16k | if (ptrType && ptrType->getElementType()->isFloatTy()) { |
5159 | 166 | needCast = true; |
5160 | 166 | val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext())); |
5161 | 166 | cmpVal = Builder.CreateBitCast(cmpVal, Type::getInt32Ty(CI->getContext())); |
5162 | 166 | unsigned addrSpace = cast<PointerType>(addr->getType())->getAddressSpace(); |
5163 | 166 | addr = Builder.CreateBitCast( |
5164 | 166 | addr, Type::getInt32PtrTy(CI->getContext(), addrSpace)); |
5165 | 166 | } |
5166 | | |
5167 | 1.16k | Value *Result = Builder.CreateAtomicCmpXchg( |
5168 | 1.16k | addr, cmpVal, val, AtomicOrdering::SequentiallyConsistent, |
5169 | 1.16k | AtomicOrdering::SequentiallyConsistent); |
5170 | | |
5171 | 1.16k | if (CI->getNumArgOperands() > |
5172 | 1.16k | HLOperandIndex::kInterlockedCmpOriginalValueOpIndex) { |
5173 | 538 | Value *originVal = Builder.CreateExtractValue(Result, 0); |
5174 | 538 | if (needCast) |
5175 | 56 | originVal = |
5176 | 56 | Builder.CreateBitCast(originVal, Type::getFloatTy(CI->getContext())); |
5177 | 538 | Builder.CreateStore( |
5178 | 538 | originVal, |
5179 | 538 | CI->getArgOperand(HLOperandIndex::kInterlockedCmpOriginalValueOpIndex)); |
5180 | 538 | } |
5181 | 1.16k | } |
5182 | | |
5183 | | Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP, |
5184 | | DXIL::OpCode opcode, |
5185 | | HLOperationLowerHelper &helper, |
5186 | | HLObjectOperationLowerHelper *pObjHelper, |
5187 | 1.23k | bool &Translated) { |
5188 | 1.23k | Value *addr = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex); |
5189 | 1.23k | addr = SkipAddrSpaceCast(addr); |
5190 | | |
5191 | 1.23k | unsigned addressSpace = addr->getType()->getPointerAddressSpace(); |
5192 | 1.23k | if (addressSpace == DXIL::kTGSMAddrSpace || |
5193 | 1.23k | addressSpace == DXIL::kNodeRecordAddrSpace176 ) |
5194 | 1.16k | TranslateSharedMemOrNodeAtomicCmpXChg(CI, addr); |
5195 | 76 | else { |
5196 | | // If not groupshared, we either have an error case or will translate |
5197 | | // the atomic op in the process of translating users of the subscript |
5198 | | // operator Mark not translated and validate dest param |
5199 | 76 | Translated = false; |
5200 | 76 | ValidateAtomicDestination(CI, pObjHelper); |
5201 | 76 | } |
5202 | | |
5203 | 1.23k | return nullptr; |
5204 | 1.23k | } |
5205 | | } // namespace |
5206 | | |
5207 | | // Process Tess Factor. |
5208 | | namespace { |
5209 | | |
5210 | | // Clamp to [0.0f..1.0f], NaN->0.0f. |
5211 | | Value *CleanupTessFactorScale(Value *input, hlsl::OP *hlslOP, |
5212 | 288 | IRBuilder<> &Builder) { |
5213 | 288 | float fMin = 0; |
5214 | 288 | float fMax = 1; |
5215 | 288 | Type *f32Ty = input->getType()->getScalarType(); |
5216 | 288 | Value *minFactor = ConstantFP::get(f32Ty, fMin); |
5217 | 288 | Value *maxFactor = ConstantFP::get(f32Ty, fMax); |
5218 | 288 | Type *Ty = input->getType(); |
5219 | 288 | if (Ty->isVectorTy()) |
5220 | 288 | minFactor = SplatToVector(minFactor, input->getType(), Builder); |
5221 | 288 | Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, |
5222 | 288 | hlslOP, Builder); |
5223 | 288 | if (Ty->isVectorTy()) |
5224 | 288 | maxFactor = SplatToVector(maxFactor, input->getType(), Builder); |
5225 | 288 | return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, |
5226 | 288 | Builder); |
5227 | 288 | } |
5228 | | |
5229 | | // Clamp to [1.0f..Inf], NaN->1.0f. |
5230 | 288 | Value *CleanupTessFactor(Value *input, hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5231 | 288 | float fMin = 1.0; |
5232 | 288 | Type *f32Ty = input->getType()->getScalarType(); |
5233 | 288 | Value *minFactor = ConstantFP::get(f32Ty, fMin); |
5234 | 288 | minFactor = SplatToVector(minFactor, input->getType(), Builder); |
5235 | 288 | return TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, |
5236 | 288 | hlslOP, Builder); |
5237 | 288 | } |
5238 | | |
5239 | | // Do partitioning-specific clamping. |
5240 | | Value *ClampTessFactor(Value *input, |
5241 | | DXIL::TessellatorPartitioning partitionMode, |
5242 | 680 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5243 | 680 | const unsigned kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR = 64; |
5244 | 680 | const unsigned kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR = 63; |
5245 | | |
5246 | 680 | const unsigned kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR = 2; |
5247 | 680 | const unsigned kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR = 1; |
5248 | | |
5249 | 680 | const unsigned kTESSELLATOR_MAX_TESSELLATION_FACTOR = 64; |
5250 | | |
5251 | 680 | float fMin; |
5252 | 680 | float fMax; |
5253 | 680 | switch (partitionMode) { |
5254 | 152 | case DXIL::TessellatorPartitioning::Integer: |
5255 | 152 | fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; |
5256 | 152 | fMax = kTESSELLATOR_MAX_TESSELLATION_FACTOR; |
5257 | 152 | break; |
5258 | 152 | case DXIL::TessellatorPartitioning::Pow2: |
5259 | 152 | fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; |
5260 | 152 | fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; |
5261 | 152 | break; |
5262 | 224 | case DXIL::TessellatorPartitioning::FractionalOdd: |
5263 | 224 | fMin = kTESSELLATOR_MIN_ODD_TESSELLATION_FACTOR; |
5264 | 224 | fMax = kTESSELLATOR_MAX_ODD_TESSELLATION_FACTOR; |
5265 | 224 | break; |
5266 | 152 | case DXIL::TessellatorPartitioning::FractionalEven: |
5267 | 152 | default: |
5268 | 152 | DXASSERT(partitionMode == DXIL::TessellatorPartitioning::FractionalEven, |
5269 | 152 | "invalid partition mode"); |
5270 | 152 | fMin = kTESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR; |
5271 | 152 | fMax = kTESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR; |
5272 | 152 | break; |
5273 | 680 | } |
5274 | 680 | Type *f32Ty = input->getType()->getScalarType(); |
5275 | 680 | Value *minFactor = ConstantFP::get(f32Ty, fMin); |
5276 | 680 | Value *maxFactor = ConstantFP::get(f32Ty, fMax); |
5277 | 680 | Type *Ty = input->getType(); |
5278 | 680 | if (Ty->isVectorTy()) |
5279 | 632 | minFactor = SplatToVector(minFactor, input->getType(), Builder); |
5280 | 680 | Value *temp = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, input, minFactor, |
5281 | 680 | hlslOP, Builder); |
5282 | 680 | if (Ty->isVectorTy()) |
5283 | 632 | maxFactor = SplatToVector(maxFactor, input->getType(), Builder); |
5284 | 680 | return TrivialDxilBinaryOperation(DXIL::OpCode::FMin, temp, maxFactor, hlslOP, |
5285 | 680 | Builder); |
5286 | 680 | } |
5287 | | |
5288 | | // round up for integer/pow2 partitioning |
5289 | | // note that this code assumes the inputs should be in the range [1, inf), |
5290 | | // which should be enforced by the clamp above. |
5291 | | Value *RoundUpTessFactor(Value *input, |
5292 | | DXIL::TessellatorPartitioning partitionMode, |
5293 | 704 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5294 | 704 | switch (partitionMode) { |
5295 | 152 | case DXIL::TessellatorPartitioning::Integer: |
5296 | 152 | return TrivialDxilUnaryOperation(DXIL::OpCode::Round_pi, input, hlslOP, |
5297 | 152 | Builder); |
5298 | 152 | case DXIL::TessellatorPartitioning::Pow2: { |
5299 | 152 | const unsigned kExponentMask = 0x7f800000; |
5300 | 152 | const unsigned kExponentLSB = 0x00800000; |
5301 | 152 | const unsigned kMantissaMask = 0x007fffff; |
5302 | 152 | Type *Ty = input->getType(); |
5303 | | // (val = (asuint(val) & mantissamask) ? |
5304 | | // (asuint(val) & exponentmask) + exponentbump : |
5305 | | // asuint(val) & exponentmask; |
5306 | 152 | Type *uintTy = Type::getInt32Ty(Ty->getContext()); |
5307 | 152 | if (Ty->isVectorTy()) |
5308 | 152 | uintTy = VectorType::get(uintTy, Ty->getVectorNumElements()); |
5309 | 152 | Value *uintVal = |
5310 | 152 | Builder.CreateCast(Instruction::CastOps::FPToUI, input, uintTy); |
5311 | | |
5312 | 152 | Value *mantMask = ConstantInt::get(uintTy->getScalarType(), kMantissaMask); |
5313 | 152 | mantMask = SplatToVector(mantMask, uintTy, Builder); |
5314 | 152 | Value *manVal = Builder.CreateAnd(uintVal, mantMask); |
5315 | | |
5316 | 152 | Value *expMask = ConstantInt::get(uintTy->getScalarType(), kExponentMask); |
5317 | 152 | expMask = SplatToVector(expMask, uintTy, Builder); |
5318 | 152 | Value *expVal = Builder.CreateAnd(uintVal, expMask); |
5319 | | |
5320 | 152 | Value *expLSB = ConstantInt::get(uintTy->getScalarType(), kExponentLSB); |
5321 | 152 | expLSB = SplatToVector(expLSB, uintTy, Builder); |
5322 | 152 | Value *newExpVal = Builder.CreateAdd(expVal, expLSB); |
5323 | | |
5324 | 152 | Value *manValNotZero = |
5325 | 152 | Builder.CreateICmpEQ(manVal, ConstantAggregateZero::get(uintTy)); |
5326 | 152 | Value *factors = Builder.CreateSelect(manValNotZero, newExpVal, expVal); |
5327 | 152 | return Builder.CreateUIToFP(factors, Ty); |
5328 | 0 | } break; |
5329 | 152 | case DXIL::TessellatorPartitioning::FractionalEven: |
5330 | 400 | case DXIL::TessellatorPartitioning::FractionalOdd: |
5331 | 400 | return input; |
5332 | 0 | default: |
5333 | 0 | DXASSERT(0, "invalid partition mode"); |
5334 | 0 | return nullptr; |
5335 | 704 | } |
5336 | 704 | } |
5337 | | |
5338 | | Value *TranslateProcessIsolineTessFactors( |
5339 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5340 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5341 | 32 | bool &Translated) { |
5342 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
5343 | | // Get partition mode |
5344 | 32 | DXASSERT_NOMSG(helper.functionProps); |
5345 | 32 | DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, |
5346 | 32 | "must be hull shader"); |
5347 | 32 | DXIL::TessellatorPartitioning partition = |
5348 | 32 | helper.functionProps->ShaderProps.HS.partition; |
5349 | | |
5350 | 32 | IRBuilder<> Builder(CI); |
5351 | | |
5352 | 32 | Value *rawDetailFactor = |
5353 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDetailFactor); |
5354 | 32 | rawDetailFactor = Builder.CreateExtractElement(rawDetailFactor, (uint64_t)0); |
5355 | | |
5356 | 32 | Value *rawDensityFactor = |
5357 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawDensityFactor); |
5358 | 32 | rawDensityFactor = |
5359 | 32 | Builder.CreateExtractElement(rawDensityFactor, (uint64_t)0); |
5360 | | |
5361 | 32 | Value *init = UndefValue::get(VectorType::get(helper.f32Ty, 2)); |
5362 | 32 | init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)0); |
5363 | 32 | init = Builder.CreateInsertElement(init, rawDetailFactor, (uint64_t)1); |
5364 | | |
5365 | 32 | Value *clamped = ClampTessFactor(init, partition, hlslOP, Builder); |
5366 | 32 | Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder); |
5367 | | |
5368 | 32 | Value *roundedDetailFactor = |
5369 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDetailFactor); |
5370 | 32 | Value *temp = UndefValue::get(VectorType::get(helper.f32Ty, 1)); |
5371 | 32 | Value *roundedX = Builder.CreateExtractElement(rounded, (uint64_t)0); |
5372 | 32 | temp = Builder.CreateInsertElement(temp, roundedX, (uint64_t)0); |
5373 | 32 | Builder.CreateStore(temp, roundedDetailFactor); |
5374 | | |
5375 | 32 | Value *roundedDensityFactor = |
5376 | 32 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedDensityFactor); |
5377 | 32 | Value *roundedY = Builder.CreateExtractElement(rounded, 1); |
5378 | 32 | temp = Builder.CreateInsertElement(temp, roundedY, (uint64_t)0); |
5379 | 32 | Builder.CreateStore(temp, roundedDensityFactor); |
5380 | 32 | return nullptr; |
5381 | 32 | } |
5382 | | |
5383 | | // 3 inputs, 1 result |
5384 | | Value *ApplyTriTessFactorOp(Value *input, DXIL::OpCode opcode, hlsl::OP *hlslOP, |
5385 | 120 | IRBuilder<> &Builder) { |
5386 | 120 | Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0); |
5387 | 120 | Value *input1 = Builder.CreateExtractElement(input, 1); |
5388 | 120 | Value *input2 = Builder.CreateExtractElement(input, 2); |
5389 | | |
5390 | 120 | if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin80 ) { |
5391 | 72 | Value *temp = |
5392 | 72 | TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder); |
5393 | 72 | Value *combined = |
5394 | 72 | TrivialDxilBinaryOperation(opcode, temp, input2, hlslOP, Builder); |
5395 | 72 | return combined; |
5396 | 72 | } |
5397 | | |
5398 | | // Avg. |
5399 | 48 | Value *temp = Builder.CreateFAdd(input0, input1); |
5400 | 48 | Value *combined = Builder.CreateFAdd(temp, input2); |
5401 | 48 | Value *rcp = ConstantFP::get(input0->getType(), 1.0 / 3.0); |
5402 | 48 | combined = Builder.CreateFMul(combined, rcp); |
5403 | 48 | return combined; |
5404 | 120 | } |
5405 | | |
5406 | | // 4 inputs, 1 result |
5407 | | Value *ApplyQuadTessFactorOp(Value *input, DXIL::OpCode opcode, |
5408 | 120 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5409 | 120 | Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0); |
5410 | 120 | Value *input1 = Builder.CreateExtractElement(input, 1); |
5411 | 120 | Value *input2 = Builder.CreateExtractElement(input, 2); |
5412 | 120 | Value *input3 = Builder.CreateExtractElement(input, 3); |
5413 | | |
5414 | 120 | if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin80 ) { |
5415 | 72 | Value *temp0 = |
5416 | 72 | TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder); |
5417 | 72 | Value *temp1 = |
5418 | 72 | TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder); |
5419 | 72 | Value *combined = |
5420 | 72 | TrivialDxilBinaryOperation(opcode, temp0, temp1, hlslOP, Builder); |
5421 | 72 | return combined; |
5422 | 72 | } |
5423 | | |
5424 | | // Avg. |
5425 | 48 | Value *temp0 = Builder.CreateFAdd(input0, input1); |
5426 | 48 | Value *temp1 = Builder.CreateFAdd(input2, input3); |
5427 | 48 | Value *combined = Builder.CreateFAdd(temp0, temp1); |
5428 | 48 | Value *rcp = ConstantFP::get(input0->getType(), 0.25); |
5429 | 48 | combined = Builder.CreateFMul(combined, rcp); |
5430 | 48 | return combined; |
5431 | 120 | } |
5432 | | |
5433 | | // 4 inputs, 2 result |
5434 | | Value *Apply2DQuadTessFactorOp(Value *input, DXIL::OpCode opcode, |
5435 | 120 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5436 | 120 | Value *input0 = Builder.CreateExtractElement(input, (uint64_t)0); |
5437 | 120 | Value *input1 = Builder.CreateExtractElement(input, 1); |
5438 | 120 | Value *input2 = Builder.CreateExtractElement(input, 2); |
5439 | 120 | Value *input3 = Builder.CreateExtractElement(input, 3); |
5440 | | |
5441 | 120 | if (opcode == DXIL::OpCode::FMax || opcode == DXIL::OpCode::FMin80 ) { |
5442 | 72 | Value *temp0 = |
5443 | 72 | TrivialDxilBinaryOperation(opcode, input0, input1, hlslOP, Builder); |
5444 | 72 | Value *temp1 = |
5445 | 72 | TrivialDxilBinaryOperation(opcode, input2, input3, hlslOP, Builder); |
5446 | 72 | Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2)); |
5447 | 72 | combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0); |
5448 | 72 | combined = Builder.CreateInsertElement(combined, temp1, 1); |
5449 | 72 | return combined; |
5450 | 72 | } |
5451 | | |
5452 | | // Avg. |
5453 | 48 | Value *temp0 = Builder.CreateFAdd(input0, input1); |
5454 | 48 | Value *temp1 = Builder.CreateFAdd(input2, input3); |
5455 | 48 | Value *combined = UndefValue::get(VectorType::get(input0->getType(), 2)); |
5456 | 48 | combined = Builder.CreateInsertElement(combined, temp0, (uint64_t)0); |
5457 | 48 | combined = Builder.CreateInsertElement(combined, temp1, 1); |
5458 | 48 | Constant *rcp = ConstantFP::get(input0->getType(), 0.5); |
5459 | 48 | rcp = ConstantVector::getSplat(2, rcp); |
5460 | 48 | combined = Builder.CreateFMul(combined, rcp); |
5461 | 48 | return combined; |
5462 | 120 | } |
5463 | | |
5464 | | Value *ResolveSmallValue(Value **pClampedResult, Value *rounded, |
5465 | | Value *averageUnscaled, float cutoffVal, |
5466 | | DXIL::TessellatorPartitioning partitionMode, |
5467 | 72 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5468 | 72 | Value *clampedResult = *pClampedResult; |
5469 | 72 | Value *clampedVal = clampedResult; |
5470 | 72 | Value *roundedVal = rounded; |
5471 | | // Do partitioning-specific clamping. |
5472 | 72 | Value *clampedAvg = |
5473 | 72 | ClampTessFactor(averageUnscaled, partitionMode, hlslOP, Builder); |
5474 | 72 | Constant *cutoffVals = |
5475 | 72 | ConstantFP::get(Type::getFloatTy(rounded->getContext()), cutoffVal); |
5476 | 72 | if (clampedAvg->getType()->isVectorTy()) |
5477 | 24 | cutoffVals = ConstantVector::getSplat( |
5478 | 24 | clampedAvg->getType()->getVectorNumElements(), cutoffVals); |
5479 | | // Limit the value. |
5480 | 72 | clampedAvg = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, clampedAvg, |
5481 | 72 | cutoffVals, hlslOP, Builder); |
5482 | | // Round up for integer/pow2 partitioning. |
5483 | 72 | Value *roundedAvg = |
5484 | 72 | RoundUpTessFactor(clampedAvg, partitionMode, hlslOP, Builder); |
5485 | | |
5486 | 72 | if (rounded->getType() != cutoffVals->getType()) |
5487 | 48 | cutoffVals = ConstantVector::getSplat( |
5488 | 48 | rounded->getType()->getVectorNumElements(), cutoffVals); |
5489 | | // If the scaled value is less than three, then take the unscaled average. |
5490 | 72 | Value *lt = Builder.CreateFCmpOLT(rounded, cutoffVals); |
5491 | 72 | if (clampedAvg->getType() != clampedVal->getType()) |
5492 | 48 | clampedAvg = SplatToVector(clampedAvg, clampedVal->getType(), Builder); |
5493 | 72 | *pClampedResult = Builder.CreateSelect(lt, clampedAvg, clampedVal); |
5494 | | |
5495 | 72 | if (roundedAvg->getType() != roundedVal->getType()) |
5496 | 48 | roundedAvg = SplatToVector(roundedAvg, roundedVal->getType(), Builder); |
5497 | 72 | Value *result = Builder.CreateSelect(lt, roundedAvg, roundedVal); |
5498 | 72 | return result; |
5499 | 72 | } |
5500 | | |
5501 | | void ResolveQuadAxes(Value **pFinalResult, Value **pClampedResult, |
5502 | | float cutoffVal, |
5503 | | DXIL::TessellatorPartitioning partitionMode, |
5504 | 24 | hlsl::OP *hlslOP, IRBuilder<> &Builder) { |
5505 | 24 | Value *finalResult = *pFinalResult; |
5506 | 24 | Value *clampedResult = *pClampedResult; |
5507 | | |
5508 | 24 | Value *clampR = clampedResult; |
5509 | 24 | Value *finalR = finalResult; |
5510 | 24 | Type *f32Ty = Type::getFloatTy(finalR->getContext()); |
5511 | 24 | Constant *cutoffVals = ConstantFP::get(f32Ty, cutoffVal); |
5512 | | |
5513 | 24 | Value *minValsX = cutoffVals; |
5514 | 24 | Value *minValsY = |
5515 | 24 | RoundUpTessFactor(cutoffVals, partitionMode, hlslOP, Builder); |
5516 | | |
5517 | 24 | Value *clampRX = Builder.CreateExtractElement(clampR, (uint64_t)0); |
5518 | 24 | Value *clampRY = Builder.CreateExtractElement(clampR, 1); |
5519 | 24 | Value *maxValsX = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, clampRX, |
5520 | 24 | clampRY, hlslOP, Builder); |
5521 | | |
5522 | 24 | Value *finalRX = Builder.CreateExtractElement(finalR, (uint64_t)0); |
5523 | 24 | Value *finalRY = Builder.CreateExtractElement(finalR, 1); |
5524 | 24 | Value *maxValsY = TrivialDxilBinaryOperation(DXIL::OpCode::FMax, finalRX, |
5525 | 24 | finalRY, hlslOP, Builder); |
5526 | | |
5527 | | // Don't go over our threshold ("final" one is rounded). |
5528 | 24 | Value *optionX = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsX, |
5529 | 24 | minValsX, hlslOP, Builder); |
5530 | 24 | Value *optionY = TrivialDxilBinaryOperation(DXIL::OpCode::FMin, maxValsY, |
5531 | 24 | minValsY, hlslOP, Builder); |
5532 | | |
5533 | 24 | Value *clampL = SplatToVector(optionX, clampR->getType(), Builder); |
5534 | 24 | Value *finalL = SplatToVector(optionY, finalR->getType(), Builder); |
5535 | | |
5536 | 24 | cutoffVals = ConstantVector::getSplat(2, cutoffVals); |
5537 | 24 | Value *lt = Builder.CreateFCmpOLT(clampedResult, cutoffVals); |
5538 | 24 | *pClampedResult = Builder.CreateSelect(lt, clampL, clampR); |
5539 | 24 | *pFinalResult = Builder.CreateSelect(lt, finalL, finalR); |
5540 | 24 | } |
5541 | | |
5542 | | Value *TranslateProcessTessFactors(CallInst *CI, IntrinsicOp IOP, |
5543 | | OP::OpCode opcode, |
5544 | | HLOperationLowerHelper &helper, |
5545 | | HLObjectOperationLowerHelper *pObjHelper, |
5546 | 288 | bool &Translated) { |
5547 | 288 | hlsl::OP *hlslOP = &helper.hlslOP; |
5548 | | // Get partition mode |
5549 | 288 | DXASSERT_NOMSG(helper.functionProps); |
5550 | 288 | DXASSERT(helper.functionProps->shaderKind == ShaderModel::Kind::Hull, |
5551 | 288 | "must be hull shader"); |
5552 | 288 | DXIL::TessellatorPartitioning partition = |
5553 | 288 | helper.functionProps->ShaderProps.HS.partition; |
5554 | | |
5555 | 288 | IRBuilder<> Builder(CI); |
5556 | | |
5557 | 288 | DXIL::OpCode tessFactorOp = DXIL::OpCode::NumOpCodes; |
5558 | 288 | switch (IOP) { |
5559 | 32 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMax: |
5560 | 64 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMax: |
5561 | 96 | case IntrinsicOp::IOP_ProcessTriTessFactorsMax: |
5562 | 96 | tessFactorOp = DXIL::OpCode::FMax; |
5563 | 96 | break; |
5564 | 32 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMin: |
5565 | 64 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMin: |
5566 | 96 | case IntrinsicOp::IOP_ProcessTriTessFactorsMin: |
5567 | 96 | tessFactorOp = DXIL::OpCode::FMin; |
5568 | 96 | break; |
5569 | 96 | default: |
5570 | | // Default is Avg. |
5571 | 96 | break; |
5572 | 288 | } |
5573 | | |
5574 | 288 | Value *rawEdgeFactor = |
5575 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRawEdgeFactor); |
5576 | | |
5577 | 288 | Value *insideScale = |
5578 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorInsideScale); |
5579 | | // Clamp to [0.0f..1.0f], NaN->0.0f. |
5580 | 288 | Value *scales = CleanupTessFactorScale(insideScale, hlslOP, Builder); |
5581 | | // Do partitioning-specific clamping. |
5582 | 288 | Value *clamped = ClampTessFactor(rawEdgeFactor, partition, hlslOP, Builder); |
5583 | | // Round up for integer/pow2 partitioning. |
5584 | 288 | Value *rounded = RoundUpTessFactor(clamped, partition, hlslOP, Builder); |
5585 | | // Store the output. |
5586 | 288 | Value *roundedEdgeFactor = |
5587 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedEdgeFactor); |
5588 | 288 | Builder.CreateStore(rounded, roundedEdgeFactor); |
5589 | | |
5590 | | // Clamp to [1.0f..Inf], NaN->1.0f. |
5591 | 288 | bool isQuad = false; |
5592 | 288 | Value *clean = CleanupTessFactor(rawEdgeFactor, hlslOP, Builder); |
5593 | 288 | Value *factors = nullptr; |
5594 | 288 | switch (IOP) { |
5595 | 32 | case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg: |
5596 | 64 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMax: |
5597 | 96 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMin: |
5598 | 96 | factors = Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5599 | 96 | break; |
5600 | 32 | case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg: |
5601 | 64 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMax: |
5602 | 96 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMin: |
5603 | 96 | factors = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5604 | 96 | isQuad = true; |
5605 | 96 | break; |
5606 | 32 | case IntrinsicOp::IOP_ProcessTriTessFactorsAvg: |
5607 | 64 | case IntrinsicOp::IOP_ProcessTriTessFactorsMax: |
5608 | 96 | case IntrinsicOp::IOP_ProcessTriTessFactorsMin: |
5609 | 96 | factors = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5610 | 96 | break; |
5611 | 0 | default: |
5612 | 0 | DXASSERT(0, "invalid opcode for ProcessTessFactor"); |
5613 | 0 | break; |
5614 | 288 | } |
5615 | | |
5616 | 288 | Value *scaledI = nullptr; |
5617 | 288 | if (scales->getType() == factors->getType()) |
5618 | 96 | scaledI = Builder.CreateFMul(factors, scales); |
5619 | 192 | else { |
5620 | 192 | Value *vecFactors = SplatToVector(factors, scales->getType(), Builder); |
5621 | 192 | scaledI = Builder.CreateFMul(vecFactors, scales); |
5622 | 192 | } |
5623 | | |
5624 | | // Do partitioning-specific clamping. |
5625 | 288 | Value *clampedI = ClampTessFactor(scaledI, partition, hlslOP, Builder); |
5626 | | |
5627 | | // Round up for integer/pow2 partitioning. |
5628 | 288 | Value *roundedI = RoundUpTessFactor(clampedI, partition, hlslOP, Builder); |
5629 | | |
5630 | 288 | Value *finalI = roundedI; |
5631 | | |
5632 | 288 | if (partition == DXIL::TessellatorPartitioning::FractionalOdd) { |
5633 | | // If not max, set to AVG. |
5634 | 72 | if (tessFactorOp != DXIL::OpCode::FMax) |
5635 | 48 | tessFactorOp = DXIL::OpCode::NumOpCodes; |
5636 | | |
5637 | 72 | bool b2D = false; |
5638 | 72 | Value *avgFactorsI = nullptr; |
5639 | 72 | switch (IOP) { |
5640 | 8 | case IntrinsicOp::IOP_Process2DQuadTessFactorsAvg: |
5641 | 16 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMax: |
5642 | 24 | case IntrinsicOp::IOP_Process2DQuadTessFactorsMin: |
5643 | 24 | avgFactorsI = |
5644 | 24 | Apply2DQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5645 | 24 | b2D = true; |
5646 | 24 | break; |
5647 | 8 | case IntrinsicOp::IOP_ProcessQuadTessFactorsAvg: |
5648 | 16 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMax: |
5649 | 24 | case IntrinsicOp::IOP_ProcessQuadTessFactorsMin: |
5650 | 24 | avgFactorsI = ApplyQuadTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5651 | 24 | break; |
5652 | 8 | case IntrinsicOp::IOP_ProcessTriTessFactorsAvg: |
5653 | 16 | case IntrinsicOp::IOP_ProcessTriTessFactorsMax: |
5654 | 24 | case IntrinsicOp::IOP_ProcessTriTessFactorsMin: |
5655 | 24 | avgFactorsI = ApplyTriTessFactorOp(clean, tessFactorOp, hlslOP, Builder); |
5656 | 24 | break; |
5657 | 0 | default: |
5658 | 0 | DXASSERT(0, "invalid opcode for ProcessTessFactor"); |
5659 | 0 | break; |
5660 | 72 | } |
5661 | | |
5662 | 72 | finalI = ResolveSmallValue(/*inout*/ &clampedI, roundedI, avgFactorsI, |
5663 | 72 | /*cufoff*/ 3.0, partition, hlslOP, Builder); |
5664 | | |
5665 | 72 | if (b2D) |
5666 | 24 | ResolveQuadAxes(/*inout*/ &finalI, /*inout*/ &clampedI, /*cutoff*/ 3.0, |
5667 | 24 | partition, hlslOP, Builder); |
5668 | 72 | } |
5669 | | |
5670 | 288 | Value *unroundedInsideFactor = CI->getArgOperand( |
5671 | 288 | HLOperandIndex::kProcessTessFactorUnRoundedInsideFactor); |
5672 | 288 | Type *outFactorTy = unroundedInsideFactor->getType()->getPointerElementType(); |
5673 | 288 | if (outFactorTy != clampedI->getType()) { |
5674 | 96 | DXASSERT(isQuad, "quad only write one channel of out factor"); |
5675 | 96 | (void)isQuad; |
5676 | 96 | clampedI = Builder.CreateExtractElement(clampedI, (uint64_t)0); |
5677 | | // Splat clampedI to float2. |
5678 | 96 | clampedI = SplatToVector(clampedI, outFactorTy, Builder); |
5679 | 96 | } |
5680 | 288 | Builder.CreateStore(clampedI, unroundedInsideFactor); |
5681 | | |
5682 | 288 | Value *roundedInsideFactor = |
5683 | 288 | CI->getArgOperand(HLOperandIndex::kProcessTessFactorRoundedInsideFactor); |
5684 | 288 | if (outFactorTy != finalI->getType()) { |
5685 | 96 | DXASSERT(isQuad, "quad only write one channel of out factor"); |
5686 | 96 | finalI = Builder.CreateExtractElement(finalI, (uint64_t)0); |
5687 | | // Splat finalI to float2. |
5688 | 96 | finalI = SplatToVector(finalI, outFactorTy, Builder); |
5689 | 96 | } |
5690 | 288 | Builder.CreateStore(finalI, roundedInsideFactor); |
5691 | 288 | return nullptr; |
5692 | 288 | } |
5693 | | |
5694 | | } // namespace |
5695 | | |
5696 | | // Ray Tracing. |
5697 | | namespace { |
5698 | | Value *TranslateReportIntersection(CallInst *CI, IntrinsicOp IOP, |
5699 | | OP::OpCode opcode, |
5700 | | HLOperationLowerHelper &helper, |
5701 | | HLObjectOperationLowerHelper *pObjHelper, |
5702 | 142 | bool &Translated) { |
5703 | 142 | hlsl::OP *hlslOP = &helper.hlslOP; |
5704 | 142 | Value *THit = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
5705 | 142 | Value *HitKind = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
5706 | 142 | Value *Attr = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
5707 | 142 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5708 | | |
5709 | 142 | Type *Ty = Attr->getType(); |
5710 | 142 | Function *F = hlslOP->GetOpFunc(opcode, Ty); |
5711 | | |
5712 | 142 | IRBuilder<> Builder(CI); |
5713 | 142 | return Builder.CreateCall(F, {opArg, THit, HitKind, Attr}); |
5714 | 142 | } |
5715 | | |
5716 | | Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5717 | | HLOperationLowerHelper &helper, |
5718 | | HLObjectOperationLowerHelper *pObjHelper, |
5719 | 126 | bool &Translated) { |
5720 | 126 | hlsl::OP *hlslOP = &helper.hlslOP; |
5721 | 126 | Value *ShaderIndex = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
5722 | 126 | Value *Parameter = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
5723 | 126 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5724 | | |
5725 | 126 | Type *Ty = Parameter->getType(); |
5726 | 126 | Function *F = hlslOP->GetOpFunc(opcode, Ty); |
5727 | | |
5728 | 126 | IRBuilder<> Builder(CI); |
5729 | 126 | return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter}); |
5730 | 126 | } |
5731 | | |
5732 | | static void TransferRayDescArgs(Value **Args, hlsl::OP *OP, |
5733 | | IRBuilder<> &Builder, CallInst *CI, |
5734 | 732 | unsigned &Index, unsigned &HLIndex) { |
5735 | | // Extract elements from flattened ray desc arguments in HL op. |
5736 | | // float3 Origin; |
5737 | 732 | Value *origin = CI->getArgOperand(HLIndex++); |
5738 | 732 | Args[Index++] = Builder.CreateExtractElement(origin, (uint64_t)0); |
5739 | 732 | Args[Index++] = Builder.CreateExtractElement(origin, 1); |
5740 | 732 | Args[Index++] = Builder.CreateExtractElement(origin, 2); |
5741 | | // float TMin; |
5742 | 732 | Args[Index++] = CI->getArgOperand(HLIndex++); |
5743 | | // float3 Direction; |
5744 | 732 | Value *direction = CI->getArgOperand(HLIndex++); |
5745 | 732 | Args[Index++] = Builder.CreateExtractElement(direction, (uint64_t)0); |
5746 | 732 | Args[Index++] = Builder.CreateExtractElement(direction, 1); |
5747 | 732 | Args[Index++] = Builder.CreateExtractElement(direction, 2); |
5748 | | // float TMax; |
5749 | 732 | Args[Index++] = CI->getArgOperand(HLIndex++); |
5750 | 732 | } |
5751 | | |
5752 | | Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
5753 | | HLOperationLowerHelper &Helper, |
5754 | | HLObjectOperationLowerHelper *pObjHelper, |
5755 | 548 | bool &Translated) { |
5756 | 548 | hlsl::OP *OP = &Helper.hlslOP; |
5757 | | |
5758 | 548 | Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; |
5759 | 548 | Args[0] = OP->GetU32Const(static_cast<unsigned>(OpCode)); |
5760 | 548 | unsigned Index = 1, HLIndex = 1; |
5761 | 3.83k | while (HLIndex < HLOperandIndex::kTraceRayRayDescOpIdx) |
5762 | 3.28k | Args[Index++] = CI->getArgOperand(HLIndex++); |
5763 | | |
5764 | 548 | IRBuilder<> Builder(CI); |
5765 | 548 | TransferRayDescArgs(Args, OP, Builder, CI, Index, HLIndex); |
5766 | 548 | DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands() - 1); |
5767 | 548 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayPayloadOpIdx); |
5768 | | |
5769 | 548 | Value *Payload = CI->getArgOperand(HLIndex++); |
5770 | 548 | Args[Index++] = Payload; |
5771 | | |
5772 | 548 | DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); |
5773 | 548 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayNumOp); |
5774 | | |
5775 | 548 | Type *Ty = Payload->getType(); |
5776 | 548 | Function *F = OP->GetOpFunc(OpCode, Ty); |
5777 | | |
5778 | 548 | return Builder.CreateCall(F, Args); |
5779 | 548 | } |
5780 | | |
5781 | | // RayQuery methods |
5782 | | |
5783 | | Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, |
5784 | | OP::OpCode opcode, |
5785 | | HLOperationLowerHelper &helper, |
5786 | | HLObjectOperationLowerHelper *pObjHelper, |
5787 | 146 | bool &Translated) { |
5788 | 146 | hlsl::OP *hlslOP = &helper.hlslOP; |
5789 | | // upgrade to allocateRayQuery2 if there is a non-zero 2nd template arg |
5790 | 146 | DXASSERT(CI->getNumArgOperands() == 3, |
5791 | 146 | "hlopcode for allocaterayquery always expects 3 arguments"); |
5792 | | |
5793 | 146 | llvm::Value *Arg = |
5794 | 146 | CI->getArgOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx); |
5795 | 146 | llvm::ConstantInt *ConstVal = llvm::dyn_cast<llvm::ConstantInt>(Arg); |
5796 | 146 | DXASSERT(ConstVal, |
5797 | 146 | "2nd argument to allocaterayquery must always be a constant value"); |
5798 | 146 | if (ConstVal->getValue().getZExtValue() != 0) { |
5799 | 6 | Value *refArgs[3] = { |
5800 | 6 | nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx), |
5801 | 6 | CI->getOperand(HLOperandIndex::kAllocateRayQueryRayQueryFlagsIdx)}; |
5802 | 6 | opcode = OP::OpCode::AllocateRayQuery2; |
5803 | 6 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
5804 | 6 | } |
5805 | 140 | Value *refArgs[2] = { |
5806 | 140 | nullptr, CI->getOperand(HLOperandIndex::kAllocateRayQueryRayFlagsIdx)}; |
5807 | 140 | return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP); |
5808 | 146 | } |
5809 | | |
5810 | | Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5811 | | HLOperationLowerHelper &helper, |
5812 | | HLObjectOperationLowerHelper *pObjHelper, |
5813 | 172 | bool &Translated) { |
5814 | 172 | hlsl::OP *hlslOP = &helper.hlslOP; |
5815 | 172 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5816 | | |
5817 | 172 | Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp]; |
5818 | 172 | Args[0] = opArg; |
5819 | 172 | unsigned Index = 1, HLIndex = 1; |
5820 | 860 | while (HLIndex < HLOperandIndex::kTraceRayInlineRayDescOpIdx) |
5821 | 688 | Args[Index++] = CI->getArgOperand(HLIndex++); |
5822 | | |
5823 | 172 | IRBuilder<> Builder(CI); |
5824 | 172 | DXASSERT_NOMSG(HLIndex == HLOperandIndex::kTraceRayInlineRayDescOpIdx); |
5825 | 172 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx); |
5826 | 172 | TransferRayDescArgs(Args, hlslOP, Builder, CI, Index, HLIndex); |
5827 | 172 | DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); |
5828 | 172 | DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineNumOp); |
5829 | | |
5830 | 172 | Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy()); |
5831 | | |
5832 | 172 | return Builder.CreateCall(F, Args); |
5833 | 172 | } |
5834 | | |
5835 | | Value *TranslateCommitProceduralPrimitiveHit( |
5836 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5837 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5838 | 8 | bool &Translated) { |
5839 | 8 | hlsl::OP *hlslOP = &helper.hlslOP; |
5840 | 8 | Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
5841 | 8 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5842 | 8 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5843 | | |
5844 | 8 | Value *Args[] = {opArg, handle, THit}; |
5845 | | |
5846 | 8 | IRBuilder<> Builder(CI); |
5847 | 8 | Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy()); |
5848 | | |
5849 | 8 | return Builder.CreateCall(F, Args); |
5850 | 8 | } |
5851 | | |
5852 | | Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, |
5853 | | OP::OpCode opcode, |
5854 | | HLOperationLowerHelper &helper, |
5855 | | HLObjectOperationLowerHelper *pObjHelper, |
5856 | 268 | bool &Translated) { |
5857 | 268 | hlsl::OP *hlslOP = &helper.hlslOP; |
5858 | | |
5859 | 268 | Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode)); |
5860 | 268 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5861 | | |
5862 | 268 | IRBuilder<> Builder(CI); |
5863 | 268 | Function *F = hlslOP->GetOpFunc(opcode, CI->getType()); |
5864 | | |
5865 | 268 | return Builder.CreateCall(F, {opArg, handle}); |
5866 | 268 | } |
5867 | | |
5868 | | Value *TranslateRayQueryMatrix3x4Operation( |
5869 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5870 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5871 | 32 | bool &Translated) { |
5872 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
5873 | 32 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5874 | 32 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5875 | 32 | uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; |
5876 | 32 | Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); |
5877 | 32 | uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; |
5878 | 32 | Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); |
5879 | 32 | Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, |
5880 | 32 | Ty, CI, hlslOP); |
5881 | 32 | return retVal; |
5882 | 32 | } |
5883 | | |
5884 | | Value *TranslateRayQueryTransposedMatrix3x4Operation( |
5885 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5886 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5887 | 32 | bool &Translated) { |
5888 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
5889 | 32 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5890 | 32 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5891 | 32 | uint32_t rVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; |
5892 | 32 | Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); |
5893 | 32 | uint8_t cVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; |
5894 | 32 | Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); |
5895 | 32 | Value *retVal = TrivialDxilOperation(opcode, {nullptr, handle, rows, cols}, |
5896 | 32 | Ty, CI, hlslOP); |
5897 | 32 | return retVal; |
5898 | 32 | } |
5899 | | |
5900 | | Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP, |
5901 | | OP::OpCode opcode, |
5902 | | HLOperationLowerHelper &helper, |
5903 | | HLObjectOperationLowerHelper *pObjHelper, |
5904 | 24 | bool &Translated) { |
5905 | 24 | hlsl::OP *hlslOP = &helper.hlslOP; |
5906 | 24 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5907 | 24 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5908 | 24 | uint8_t elementVals[] = {0, 1}; |
5909 | 24 | Constant *element = ConstantDataVector::get(CI->getContext(), elementVals); |
5910 | 24 | Value *retVal = |
5911 | 24 | TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP); |
5912 | 24 | return retVal; |
5913 | 24 | } |
5914 | | |
5915 | | Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP, |
5916 | | OP::OpCode opcode, |
5917 | | HLOperationLowerHelper &helper, |
5918 | | HLObjectOperationLowerHelper *pObjHelper, |
5919 | 48 | bool &Translated) { |
5920 | 48 | hlsl::OP *hlslOP = &helper.hlslOP; |
5921 | 48 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5922 | 48 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5923 | 48 | uint8_t elementVals[] = {0, 1, 2}; |
5924 | 48 | Constant *element = ConstantDataVector::get(CI->getContext(), elementVals); |
5925 | 48 | Value *retVal = |
5926 | 48 | TrivialDxilOperation(opcode, {nullptr, handle, element}, Ty, CI, hlslOP); |
5927 | 48 | return retVal; |
5928 | 48 | } |
5929 | | |
5930 | | Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, |
5931 | | OP::OpCode opcode, |
5932 | | HLOperationLowerHelper &helper, |
5933 | | HLObjectOperationLowerHelper *pObjHelper, |
5934 | 446 | bool &Translated) { |
5935 | 446 | hlsl::OP *hlslOP = &helper.hlslOP; |
5936 | 446 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5937 | 446 | uint8_t vals[] = {0, 1, 2, 3}; |
5938 | 446 | Constant *src = ConstantDataVector::get(CI->getContext(), vals); |
5939 | 446 | Value *retVal = TrivialDxilOperation(opcode, {nullptr, src}, Ty, CI, hlslOP); |
5940 | 446 | return retVal; |
5941 | 446 | } |
5942 | | |
5943 | | template <typename ColElemTy> |
5944 | | static void GetMatrixIndices(Constant *&Rows, Constant *&Cols, bool Is3x4, |
5945 | 72 | LLVMContext &Ctx) { |
5946 | 72 | if (Is3x4) { |
5947 | 48 | uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; |
5948 | 48 | Rows = ConstantDataVector::get(Ctx, RVals); |
5949 | 48 | ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; |
5950 | 48 | Cols = ConstantDataVector::get(Ctx, CVals); |
5951 | 48 | return; |
5952 | 48 | } |
5953 | 24 | uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; |
5954 | 24 | Rows = ConstantDataVector::get(Ctx, RVals); |
5955 | 24 | ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; |
5956 | 24 | Cols = ConstantDataVector::get(Ctx, CVals); |
5957 | 24 | } HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned char>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&) Line | Count | Source | 5945 | 56 | LLVMContext &Ctx) { | 5946 | 56 | if (Is3x4) { | 5947 | 40 | uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; | 5948 | 40 | Rows = ConstantDataVector::get(Ctx, RVals); | 5949 | 40 | ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; | 5950 | 40 | Cols = ConstantDataVector::get(Ctx, CVals); | 5951 | 40 | return; | 5952 | 40 | } | 5953 | 16 | uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; | 5954 | 16 | Rows = ConstantDataVector::get(Ctx, RVals); | 5955 | 16 | ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; | 5956 | 16 | Cols = ConstantDataVector::get(Ctx, CVals); | 5957 | 16 | } |
HLOperationLower.cpp:void (anonymous namespace)::GetMatrixIndices<unsigned int>(llvm::Constant*&, llvm::Constant*&, bool, llvm::LLVMContext&) Line | Count | Source | 5945 | 16 | LLVMContext &Ctx) { | 5946 | 16 | if (Is3x4) { | 5947 | 8 | uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; | 5948 | 8 | Rows = ConstantDataVector::get(Ctx, RVals); | 5949 | 8 | ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; | 5950 | 8 | Cols = ConstantDataVector::get(Ctx, CVals); | 5951 | 8 | return; | 5952 | 8 | } | 5953 | 8 | uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; | 5954 | 8 | Rows = ConstantDataVector::get(Ctx, RVals); | 5955 | 8 | ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; | 5956 | 8 | Cols = ConstantDataVector::get(Ctx, CVals); | 5957 | 8 | } |
|
5958 | | |
5959 | | Value *TranslateNoArgMatrix3x4Operation( |
5960 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5961 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5962 | 40 | bool &Translated) { |
5963 | 40 | hlsl::OP *hlslOP = &helper.hlslOP; |
5964 | 40 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5965 | 40 | Constant *Rows, *Cols; |
5966 | 40 | GetMatrixIndices<uint8_t>(Rows, Cols, true, CI->getContext()); |
5967 | 40 | return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); |
5968 | 40 | } |
5969 | | |
5970 | | Value *TranslateNoArgTransposedMatrix3x4Operation( |
5971 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
5972 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
5973 | 16 | bool &Translated) { |
5974 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
5975 | 16 | VectorType *Ty = cast<VectorType>(CI->getType()); |
5976 | 16 | Constant *Rows, *Cols; |
5977 | 16 | GetMatrixIndices<uint8_t>(Rows, Cols, false, CI->getContext()); |
5978 | 16 | return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); |
5979 | 16 | } |
5980 | | |
5981 | | /* |
5982 | | HLSL: |
5983 | | void ThreadNodeOutputRecords<recordType>::OutputComplete(); |
5984 | | void GroupNodeOutputRecords<recordType>::OutputComplete(); |
5985 | | DXIL: |
5986 | | void @dx.op.outputComplete(i32 %Opcode, %dx.types.NodeRecordHandle |
5987 | | %RecordHandle) |
5988 | | */ |
5989 | | Value *TranslateNodeOutputComplete(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, |
5990 | | HLOperationLowerHelper &helper, |
5991 | | HLObjectOperationLowerHelper *pObjHelper, |
5992 | 142 | bool &Translated) { |
5993 | 142 | hlsl::OP *OP = &helper.hlslOP; |
5994 | | |
5995 | 142 | Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
5996 | 142 | DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); |
5997 | 142 | Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); |
5998 | 142 | Value *opArg = OP->GetU32Const((unsigned)op); |
5999 | | |
6000 | 142 | IRBuilder<> Builder(CI); |
6001 | 142 | return Builder.CreateCall(dxilFunc, {opArg, handle}); |
6002 | 142 | } |
6003 | | |
6004 | | Value *TranslateNoArgNoReturnPreserveOutput( |
6005 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6006 | | HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, |
6007 | 140 | bool &Translated) { |
6008 | 140 | Instruction *pResult = cast<Instruction>( |
6009 | 140 | TrivialNoArgOperation(CI, IOP, opcode, helper, pObjHelper, Translated)); |
6010 | | // HL intrinsic must have had a return injected just after the call. |
6011 | | // SROA_Parameter_HLSL will copy from alloca to output just before each |
6012 | | // return. Now move call after the copy and just before the return. |
6013 | 140 | if (isa<ReturnInst>(pResult->getNextNode())) |
6014 | 0 | return pResult; |
6015 | 140 | ReturnInst *RetI = cast<ReturnInst>(pResult->getParent()->getTerminator()); |
6016 | 140 | pResult->removeFromParent(); |
6017 | 140 | pResult->insertBefore(RetI); |
6018 | 140 | return pResult; |
6019 | 140 | } |
6020 | | |
6021 | | // Special half dot2 with accumulate to float |
6022 | | Value *TranslateDot2Add(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6023 | | HLOperationLowerHelper &helper, |
6024 | | HLObjectOperationLowerHelper *pObjHelper, |
6025 | 16 | bool &Translated) { |
6026 | 16 | hlsl::OP *hlslOP = &helper.hlslOP; |
6027 | 16 | Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
6028 | 16 | const unsigned vecSize = 2; |
6029 | 16 | DXASSERT(src0->getType()->isVectorTy() && |
6030 | 16 | vecSize == src0->getType()->getVectorNumElements() && |
6031 | 16 | src0->getType()->getScalarType()->isHalfTy(), |
6032 | 16 | "otherwise, unexpected input dimension or component type"); |
6033 | | |
6034 | 16 | Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
6035 | 16 | DXASSERT(src0->getType() == src1->getType(), |
6036 | 16 | "otherwise, mismatched argument types"); |
6037 | 16 | Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
6038 | 16 | Type *accTy = accArg->getType(); |
6039 | 16 | DXASSERT(!accTy->isVectorTy() && accTy->isFloatTy(), |
6040 | 16 | "otherwise, unexpected accumulator type"); |
6041 | 16 | IRBuilder<> Builder(CI); |
6042 | | |
6043 | 16 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy); |
6044 | 16 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6045 | | |
6046 | 16 | SmallVector<Value *, 6> args; |
6047 | 16 | args.emplace_back(opArg); |
6048 | 16 | args.emplace_back(accArg); |
6049 | 48 | for (unsigned i = 0; i < vecSize; i++32 ) |
6050 | 32 | args.emplace_back(Builder.CreateExtractElement(src0, i)); |
6051 | 48 | for (unsigned i = 0; i < vecSize; i++32 ) |
6052 | 32 | args.emplace_back(Builder.CreateExtractElement(src1, i)); |
6053 | 16 | return Builder.CreateCall(dxilFunc, args); |
6054 | 16 | } |
6055 | | |
6056 | | Value *TranslateDot4AddPacked(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6057 | | HLOperationLowerHelper &helper, |
6058 | | HLObjectOperationLowerHelper *pObjHelper, |
6059 | 32 | bool &Translated) { |
6060 | 32 | hlsl::OP *hlslOP = &helper.hlslOP; |
6061 | 32 | Value *src0 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
6062 | 32 | DXASSERT( |
6063 | 32 | !src0->getType()->isVectorTy() && src0->getType()->isIntegerTy(32), |
6064 | 32 | "otherwise, unexpected vector support in high level intrinsic template"); |
6065 | 32 | Value *src1 = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
6066 | 32 | DXASSERT(src0->getType() == src1->getType(), |
6067 | 32 | "otherwise, mismatched argument types"); |
6068 | 32 | Value *accArg = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
6069 | 32 | Type *accTy = accArg->getType(); |
6070 | 32 | DXASSERT( |
6071 | 32 | !accTy->isVectorTy() && accTy->isIntegerTy(32), |
6072 | 32 | "otherwise, unexpected vector support in high level intrinsic template"); |
6073 | 32 | IRBuilder<> Builder(CI); |
6074 | | |
6075 | 32 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, accTy); |
6076 | 32 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6077 | 32 | return Builder.CreateCall(dxilFunc, {opArg, accArg, src0, src1}); |
6078 | 32 | } |
6079 | | |
6080 | | Value *TranslatePack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6081 | | HLOperationLowerHelper &helper, |
6082 | | HLObjectOperationLowerHelper *pObjHelper, |
6083 | 72 | bool &Translated) { |
6084 | 72 | hlsl::OP *hlslOP = &helper.hlslOP; |
6085 | | |
6086 | 72 | Value *val = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
6087 | 72 | Type *valTy = val->getType(); |
6088 | 72 | Type *eltTy = valTy->getScalarType(); |
6089 | | |
6090 | 72 | DXASSERT(valTy->isVectorTy() && valTy->getVectorNumElements() == 4 && |
6091 | 72 | eltTy->isIntegerTy() && |
6092 | 72 | (eltTy->getIntegerBitWidth() == 32 || |
6093 | 72 | eltTy->getIntegerBitWidth() == 16), |
6094 | 72 | "otherwise, unexpected input dimension or component type"); |
6095 | | |
6096 | 72 | DXIL::PackMode packMode = DXIL::PackMode::Trunc; |
6097 | 72 | switch (IOP) { |
6098 | 18 | case hlsl::IntrinsicOp::IOP_pack_clamp_s8: |
6099 | 18 | packMode = DXIL::PackMode::SClamp; |
6100 | 18 | break; |
6101 | 18 | case hlsl::IntrinsicOp::IOP_pack_clamp_u8: |
6102 | 18 | packMode = DXIL::PackMode::UClamp; |
6103 | 18 | break; |
6104 | 18 | case hlsl::IntrinsicOp::IOP_pack_s8: |
6105 | 36 | case hlsl::IntrinsicOp::IOP_pack_u8: |
6106 | 36 | packMode = DXIL::PackMode::Trunc; |
6107 | 36 | break; |
6108 | 0 | default: |
6109 | 0 | DXASSERT(false, "unexpected opcode"); |
6110 | 0 | break; |
6111 | 72 | } |
6112 | | |
6113 | 72 | IRBuilder<> Builder(CI); |
6114 | 72 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, eltTy); |
6115 | 72 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6116 | 72 | Constant *packModeArg = hlslOP->GetU8Const((unsigned)packMode); |
6117 | | |
6118 | 72 | Value *elt0 = Builder.CreateExtractElement(val, (uint64_t)0); |
6119 | 72 | Value *elt1 = Builder.CreateExtractElement(val, (uint64_t)1); |
6120 | 72 | Value *elt2 = Builder.CreateExtractElement(val, (uint64_t)2); |
6121 | 72 | Value *elt3 = Builder.CreateExtractElement(val, (uint64_t)3); |
6122 | 72 | return Builder.CreateCall(dxilFunc, |
6123 | 72 | {opArg, packModeArg, elt0, elt1, elt2, elt3}); |
6124 | 72 | } |
6125 | | |
6126 | | Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6127 | | HLOperationLowerHelper &helper, |
6128 | | HLObjectOperationLowerHelper *pObjHelper, |
6129 | 88 | bool &Translated) { |
6130 | 88 | hlsl::OP *hlslOP = &helper.hlslOP; |
6131 | | |
6132 | 88 | Value *packedVal = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); |
6133 | 88 | DXASSERT( |
6134 | 88 | !packedVal->getType()->isVectorTy() && |
6135 | 88 | packedVal->getType()->isIntegerTy(32), |
6136 | 88 | "otherwise, unexpected vector support in high level intrinsic template"); |
6137 | | |
6138 | 88 | Type *overloadType = nullptr; |
6139 | 88 | DXIL::UnpackMode unpackMode = DXIL::UnpackMode::Unsigned; |
6140 | 88 | switch (IOP) { |
6141 | 24 | case hlsl::IntrinsicOp::IOP_unpack_s8s32: |
6142 | 24 | unpackMode = DXIL::UnpackMode::Signed; |
6143 | 24 | overloadType = helper.i32Ty; |
6144 | 24 | break; |
6145 | 24 | case hlsl::IntrinsicOp::IOP_unpack_u8u32: |
6146 | 24 | unpackMode = DXIL::UnpackMode::Unsigned; |
6147 | 24 | overloadType = helper.i32Ty; |
6148 | 24 | break; |
6149 | 20 | case hlsl::IntrinsicOp::IOP_unpack_s8s16: |
6150 | 20 | unpackMode = DXIL::UnpackMode::Signed; |
6151 | 20 | overloadType = helper.i16Ty; |
6152 | 20 | break; |
6153 | 20 | case hlsl::IntrinsicOp::IOP_unpack_u8u16: |
6154 | 20 | unpackMode = DXIL::UnpackMode::Unsigned; |
6155 | 20 | overloadType = helper.i16Ty; |
6156 | 20 | break; |
6157 | 0 | default: |
6158 | 0 | DXASSERT(false, "unexpected opcode"); |
6159 | 0 | break; |
6160 | 88 | } |
6161 | | |
6162 | 88 | IRBuilder<> Builder(CI); |
6163 | 88 | Function *dxilFunc = hlslOP->GetOpFunc(opcode, overloadType); |
6164 | 88 | Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); |
6165 | 88 | Constant *unpackModeArg = hlslOP->GetU8Const((unsigned)unpackMode); |
6166 | 88 | Value *Res = Builder.CreateCall(dxilFunc, {opArg, unpackModeArg, packedVal}); |
6167 | | |
6168 | | // Convert the final aggregate into a vector to make the types match |
6169 | 88 | const unsigned vecSize = 4; |
6170 | 88 | Value *ResVec = UndefValue::get(CI->getType()); |
6171 | 440 | for (unsigned i = 0; i < vecSize; ++i352 ) { |
6172 | 352 | Value *Elt = Builder.CreateExtractValue(Res, i); |
6173 | 352 | ResVec = Builder.CreateInsertElement(ResVec, Elt, i); |
6174 | 352 | } |
6175 | 88 | return ResVec; |
6176 | 88 | } |
6177 | | |
6178 | | } // namespace |
6179 | | |
6180 | | // Shader Execution Reordering. |
6181 | | namespace { |
6182 | | Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, |
6183 | | OP::OpCode Opcode, |
6184 | | HLOperationLowerHelper &Helper, |
6185 | | HLObjectOperationLowerHelper *ObjHelper, |
6186 | 22 | bool &Translated) { |
6187 | 22 | hlsl::OP *HlslOP = &Helper.hlslOP; |
6188 | 22 | IRBuilder<> Builder(CI); |
6189 | 22 | Value *HitObjectPtr = CI->getArgOperand(1); |
6190 | 22 | Value *HitObject = TrivialDxilOperation( |
6191 | 22 | Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); |
6192 | 22 | Builder.CreateStore(HitObject, HitObjectPtr); |
6193 | 22 | DXASSERT( |
6194 | 22 | CI->use_empty(), |
6195 | 22 | "Default ctor return type is a Clang artifact. Value must not be used"); |
6196 | 22 | return nullptr; |
6197 | 22 | } |
6198 | | |
6199 | | Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP, |
6200 | | OP::OpCode Opcode, |
6201 | | HLOperationLowerHelper &Helper, |
6202 | | HLObjectOperationLowerHelper *ObjHelper, |
6203 | 6 | bool &Translated) { |
6204 | 6 | DXASSERT_NOMSG(CI->getNumArgOperands() == |
6205 | 6 | HLOperandIndex::kHitObjectMakeMiss_NumOp); |
6206 | 6 | hlsl::OP *OP = &Helper.hlslOP; |
6207 | 6 | IRBuilder<> Builder(CI); |
6208 | 6 | Value *Args[DXIL::OperandIndex::kHitObjectMakeMiss_NumOp]; |
6209 | 6 | Args[0] = nullptr; // Filled in by TrivialDxilOperation |
6210 | | |
6211 | 6 | unsigned DestIdx = 1, SrcIdx = 1; |
6212 | 6 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6213 | 6 | Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // RayFlags |
6214 | 6 | Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // MissShaderIdx |
6215 | | |
6216 | 6 | DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx); |
6217 | 6 | DXASSERT_NOMSG(DestIdx == |
6218 | 6 | DXIL::OperandIndex::kHitObjectMakeMiss_RayDescOpIdx); |
6219 | 6 | TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); |
6220 | 6 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6221 | 6 | DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectMakeMiss_NumOp); |
6222 | | |
6223 | 6 | Value *OutHitObject = |
6224 | 6 | TrivialDxilOperation(Opcode, Args, Helper.voidTy, CI, OP); |
6225 | 6 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6226 | 6 | return nullptr; |
6227 | 6 | } |
6228 | | |
6229 | | Value *TranslateMaybeReorderThread(CallInst *CI, IntrinsicOp IOP, |
6230 | | OP::OpCode OpCode, |
6231 | | HLOperationLowerHelper &Helper, |
6232 | | HLObjectOperationLowerHelper *pObjHelper, |
6233 | 36 | bool &Translated) { |
6234 | 36 | hlsl::OP *OP = &Helper.hlslOP; |
6235 | | |
6236 | | // clang-format off |
6237 | | // Match MaybeReorderThread overload variants: |
6238 | | // void MaybeReorderThread(<Op>, |
6239 | | // HitObject Hit); |
6240 | | // void MaybeReorderThread(<Op>, |
6241 | | // uint CoherenceHint, |
6242 | | // uint NumCoherenceHintBitsFromLSB ); |
6243 | | // void MaybeReorderThread(<Op>, |
6244 | | // HitObject Hit, |
6245 | | // uint CoherenceHint, |
6246 | | // uint NumCoherenceHintBitsFromLSB); |
6247 | | // clang-format on |
6248 | 36 | const unsigned NumHLArgs = CI->getNumArgOperands(); |
6249 | 36 | DXASSERT_NOMSG(NumHLArgs >= 2); |
6250 | | |
6251 | | // Use a NOP HitObject for MaybeReorderThread without HitObject. |
6252 | 36 | Value *HitObject = nullptr; |
6253 | 36 | unsigned HLIndex = 1; |
6254 | 36 | if (3 == NumHLArgs) { |
6255 | 6 | HitObject = TrivialDxilOperation(DXIL::OpCode::HitObject_MakeNop, {nullptr}, |
6256 | 6 | Type::getVoidTy(CI->getContext()), CI, OP); |
6257 | 30 | } else { |
6258 | 30 | Value *FirstParam = CI->getArgOperand(HLIndex); |
6259 | 30 | DXASSERT_NOMSG(isa<PointerType>(FirstParam->getType())); |
6260 | 30 | IRBuilder<> Builder(CI); |
6261 | 30 | HitObject = Builder.CreateLoad(FirstParam); |
6262 | 30 | HLIndex++; |
6263 | 30 | } |
6264 | | |
6265 | | // If there are trailing parameters, these have to be the two coherence bit |
6266 | | // parameters |
6267 | 36 | Value *CoherenceHint = nullptr; |
6268 | 36 | Value *NumCoherenceHintBits = nullptr; |
6269 | 36 | if (2 != NumHLArgs) { |
6270 | 12 | DXASSERT_NOMSG(HLIndex + 2 == NumHLArgs); |
6271 | 12 | CoherenceHint = CI->getArgOperand(HLIndex++); |
6272 | 12 | NumCoherenceHintBits = CI->getArgOperand(HLIndex++); |
6273 | 12 | DXASSERT_NOMSG(Helper.i32Ty == CoherenceHint->getType()); |
6274 | 12 | DXASSERT_NOMSG(Helper.i32Ty == NumCoherenceHintBits->getType()); |
6275 | 24 | } else { |
6276 | 24 | CoherenceHint = UndefValue::get(Helper.i32Ty); |
6277 | 24 | NumCoherenceHintBits = OP->GetU32Const(0); |
6278 | 24 | } |
6279 | | |
6280 | 36 | TrivialDxilOperation( |
6281 | 36 | OpCode, {nullptr, HitObject, CoherenceHint, NumCoherenceHintBits}, |
6282 | 36 | Type::getVoidTy(CI->getContext()), CI, OP); |
6283 | 36 | return nullptr; |
6284 | 36 | } |
6285 | | |
6286 | | Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP, |
6287 | | OP::OpCode OpCode, |
6288 | | HLOperationLowerHelper &Helper, |
6289 | | HLObjectOperationLowerHelper *pObjHelper, |
6290 | 8 | bool &Translated) { |
6291 | 8 | hlsl::OP *OP = &Helper.hlslOP; |
6292 | 8 | IRBuilder<> Builder(CI); |
6293 | | |
6294 | 8 | unsigned SrcIdx = 1; |
6295 | 8 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6296 | 8 | Value *RayQuery = CI->getArgOperand(SrcIdx++); |
6297 | | |
6298 | 8 | if (CI->getNumArgOperands() == |
6299 | 8 | HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) { |
6300 | 4 | Value *HitKind = CI->getArgOperand(SrcIdx++); |
6301 | 4 | Value *AttribSrc = CI->getArgOperand(SrcIdx++); |
6302 | 4 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6303 | 4 | OpCode = DXIL::OpCode::HitObject_FromRayQueryWithAttrs; |
6304 | 4 | Type *AttrTy = AttribSrc->getType(); |
6305 | 4 | Value *OutHitObject = TrivialDxilOperation( |
6306 | 4 | OpCode, {nullptr, RayQuery, HitKind, AttribSrc}, AttrTy, CI, OP); |
6307 | 4 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6308 | 4 | return nullptr; |
6309 | 4 | } |
6310 | | |
6311 | 4 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6312 | 4 | OpCode = DXIL::OpCode::HitObject_FromRayQuery; |
6313 | 4 | Value *OutHitObject = |
6314 | 4 | TrivialDxilOperation(OpCode, {nullptr, RayQuery}, Helper.voidTy, CI, OP); |
6315 | 4 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6316 | 4 | return nullptr; |
6317 | 8 | } |
6318 | | |
6319 | | Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, |
6320 | | OP::OpCode OpCode, |
6321 | | HLOperationLowerHelper &Helper, |
6322 | | HLObjectOperationLowerHelper *pObjHelper, |
6323 | 6 | bool &Translated) { |
6324 | 6 | hlsl::OP *OP = &Helper.hlslOP; |
6325 | 6 | IRBuilder<> Builder(CI); |
6326 | | |
6327 | 6 | DXASSERT_NOMSG(CI->getNumArgOperands() == |
6328 | 6 | HLOperandIndex::kHitObjectTraceRay_NumOp); |
6329 | 6 | Value *Args[DXIL::OperandIndex::kHitObjectTraceRay_NumOp]; |
6330 | 6 | Value *OpArg = OP->GetU32Const(static_cast<unsigned>(OpCode)); |
6331 | 6 | Args[0] = OpArg; |
6332 | | |
6333 | 6 | unsigned DestIdx = 1, SrcIdx = 1; |
6334 | 6 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6335 | 6 | Args[DestIdx++] = CI->getArgOperand(SrcIdx++); |
6336 | 36 | for (; SrcIdx < HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx; |
6337 | 30 | ++SrcIdx, ++DestIdx) { |
6338 | 30 | Args[DestIdx] = CI->getArgOperand(SrcIdx); |
6339 | 30 | } |
6340 | | |
6341 | 6 | DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx); |
6342 | 6 | DXASSERT_NOMSG(DestIdx == |
6343 | 6 | DXIL::OperandIndex::kHitObjectTraceRay_RayDescOpIdx); |
6344 | 6 | TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); |
6345 | 6 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands() - 1); |
6346 | 6 | DXASSERT_NOMSG(DestIdx == |
6347 | 6 | DXIL::OperandIndex::kHitObjectTraceRay_PayloadOpIdx); |
6348 | | |
6349 | 6 | Value *Payload = CI->getArgOperand(SrcIdx++); |
6350 | 6 | Args[DestIdx++] = Payload; |
6351 | | |
6352 | 6 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6353 | 6 | DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectTraceRay_NumOp); |
6354 | | |
6355 | 6 | Function *F = OP->GetOpFunc(OpCode, Payload->getType()); |
6356 | | |
6357 | 6 | Value *OutHitObject = Builder.CreateCall(F, Args); |
6358 | 6 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6359 | 6 | return nullptr; |
6360 | 6 | } |
6361 | | |
6362 | | Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, |
6363 | | OP::OpCode OpCode, |
6364 | | HLOperationLowerHelper &Helper, |
6365 | | HLObjectOperationLowerHelper *pObjHelper, |
6366 | 4 | bool &Translated) { |
6367 | 4 | unsigned SrcIdx = 1; |
6368 | 4 | Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); |
6369 | 4 | Value *Payload = CI->getArgOperand(SrcIdx++); |
6370 | 4 | DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); |
6371 | | |
6372 | 4 | IRBuilder<> Builder(CI); |
6373 | 4 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6374 | 4 | TrivialDxilOperation(OpCode, {nullptr, HitObject, Payload}, |
6375 | 4 | Payload->getType(), CI, &Helper.hlslOP); |
6376 | 4 | return nullptr; |
6377 | 4 | } |
6378 | | |
6379 | | Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, |
6380 | | OP::OpCode OpCode, |
6381 | | HLOperationLowerHelper &Helper, |
6382 | | HLObjectOperationLowerHelper *pObjHelper, |
6383 | 6 | bool &Translated) { |
6384 | 6 | hlsl::OP *OP = &Helper.hlslOP; |
6385 | 6 | IRBuilder<> Builder(CI); |
6386 | | |
6387 | 6 | Value *HitObjectPtr = CI->getArgOperand(1); |
6388 | 6 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6389 | 6 | Value *AttrOutPtr = |
6390 | 6 | CI->getArgOperand(HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx); |
6391 | 6 | TrivialDxilOperation(OpCode, {nullptr, HitObject, AttrOutPtr}, |
6392 | 6 | AttrOutPtr->getType(), CI, OP); |
6393 | 6 | return nullptr; |
6394 | 6 | } |
6395 | | |
6396 | | Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, |
6397 | | OP::OpCode OpCode, |
6398 | | HLOperationLowerHelper &Helper, |
6399 | | HLObjectOperationLowerHelper *pObjHelper, |
6400 | 48 | bool &Translated) { |
6401 | 48 | hlsl::OP *OP = &Helper.hlslOP; |
6402 | 48 | Value *HitObjectPtr = CI->getArgOperand(1); |
6403 | 48 | IRBuilder<> Builder(CI); |
6404 | 48 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6405 | 48 | return TrivialDxilOperation(OpCode, {nullptr, HitObject}, CI->getType(), CI, |
6406 | 48 | OP); |
6407 | 48 | } |
6408 | | |
6409 | | Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, |
6410 | | OP::OpCode OpCode, |
6411 | | HLOperationLowerHelper &Helper, |
6412 | | HLObjectOperationLowerHelper *pObjHelper, |
6413 | 16 | bool &Translated) { |
6414 | 16 | hlsl::OP *OP = &Helper.hlslOP; |
6415 | 16 | Value *HitObjectPtr = CI->getArgOperand(1); |
6416 | 16 | IRBuilder<> Builder(CI); |
6417 | 16 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6418 | 16 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6419 | 16 | uint32_t Vals[] = {0, 1, 2, 3}; |
6420 | 16 | Constant *Src = ConstantDataVector::get(CI->getContext(), Vals); |
6421 | 16 | return TrivialDxilOperation(OpCode, {nullptr, HitObject, Src}, Ty, CI, OP); |
6422 | 16 | } |
6423 | | |
6424 | 16 | static bool IsHitObject3x4Getter(IntrinsicOp IOP) { |
6425 | 16 | switch (IOP) { |
6426 | 8 | default: |
6427 | 8 | return false; |
6428 | 4 | case IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4: |
6429 | 8 | case IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4: |
6430 | 8 | return true; |
6431 | 16 | } |
6432 | 16 | } |
6433 | | |
6434 | | Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, |
6435 | | OP::OpCode OpCode, |
6436 | | HLOperationLowerHelper &Helper, |
6437 | | HLObjectOperationLowerHelper *pObjHelper, |
6438 | 16 | bool &Translated) { |
6439 | 16 | hlsl::OP *OP = &Helper.hlslOP; |
6440 | 16 | Value *HitObjectPtr = CI->getArgOperand(1); |
6441 | 16 | IRBuilder<> Builder(CI); |
6442 | 16 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6443 | | |
6444 | | // Create 3x4 matrix indices |
6445 | 16 | bool Is3x4 = IsHitObject3x4Getter(IOP); |
6446 | 16 | Constant *Rows, *Cols; |
6447 | 16 | GetMatrixIndices<uint32_t>(Rows, Cols, Is3x4, CI->getContext()); |
6448 | | |
6449 | 16 | VectorType *Ty = cast<VectorType>(CI->getType()); |
6450 | 16 | return TrivialDxilOperation(OpCode, {nullptr, HitObject, Rows, Cols}, Ty, CI, |
6451 | 16 | OP); |
6452 | 16 | } |
6453 | | |
6454 | | Value *TranslateHitObjectLoadLocalRootTableConstant( |
6455 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6456 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, |
6457 | 4 | bool &Translated) { |
6458 | 4 | hlsl::OP *OP = &Helper.hlslOP; |
6459 | 4 | IRBuilder<> Builder(CI); |
6460 | | |
6461 | 4 | Value *HitObjectPtr = CI->getArgOperand(1); |
6462 | 4 | Value *Offset = CI->getArgOperand(2); |
6463 | | |
6464 | 4 | Value *HitObject = Builder.CreateLoad(HitObjectPtr); |
6465 | 4 | return TrivialDxilOperation(OpCode, {nullptr, HitObject, Offset}, |
6466 | 4 | Helper.voidTy, CI, OP); |
6467 | 4 | } |
6468 | | |
6469 | | Value *TranslateHitObjectSetShaderTableIndex( |
6470 | | CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6471 | | HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, |
6472 | 4 | bool &Translated) { |
6473 | 4 | hlsl::OP *OP = &Helper.hlslOP; |
6474 | 4 | IRBuilder<> Builder(CI); |
6475 | | |
6476 | 4 | Value *HitObjectPtr = CI->getArgOperand(1); |
6477 | 4 | Value *ShaderTableIndex = CI->getArgOperand(2); |
6478 | | |
6479 | 4 | Value *InHitObject = Builder.CreateLoad(HitObjectPtr); |
6480 | 4 | Value *OutHitObject = TrivialDxilOperation( |
6481 | 4 | OpCode, {nullptr, InHitObject, ShaderTableIndex}, Helper.voidTy, CI, OP); |
6482 | 4 | Builder.CreateStore(OutHitObject, HitObjectPtr); |
6483 | 4 | return nullptr; |
6484 | 4 | } |
6485 | | |
6486 | | } // namespace |
6487 | | |
6488 | | // Resource Handle. |
6489 | | namespace { |
6490 | | Value *TranslateGetHandleFromHeap(CallInst *CI, IntrinsicOp IOP, |
6491 | | DXIL::OpCode opcode, |
6492 | | HLOperationLowerHelper &helper, |
6493 | | HLObjectOperationLowerHelper *pObjHelper, |
6494 | 602 | bool &Translated) { |
6495 | 602 | hlsl::OP &hlslOP = helper.hlslOP; |
6496 | 602 | Function *dxilFunc = hlslOP.GetOpFunc(opcode, helper.voidTy); |
6497 | 602 | IRBuilder<> Builder(CI); |
6498 | 602 | Value *opArg = ConstantInt::get(helper.i32Ty, (unsigned)opcode); |
6499 | 602 | return Builder.CreateCall( |
6500 | 602 | dxilFunc, {opArg, CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx), |
6501 | 602 | CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx), |
6502 | | // TODO: update nonUniformIndex later. |
6503 | 602 | Builder.getInt1(false)}); |
6504 | 602 | } |
6505 | | } // namespace |
6506 | | |
6507 | | // Translate and/or/select intrinsics |
6508 | | namespace { |
6509 | | Value *TranslateAnd(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6510 | | HLOperationLowerHelper &helper, |
6511 | | HLObjectOperationLowerHelper *pObjHelper, |
6512 | 60 | bool &Translated) { |
6513 | 60 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
6514 | 60 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
6515 | 60 | IRBuilder<> Builder(CI); |
6516 | | |
6517 | 60 | return Builder.CreateAnd(x, y); |
6518 | 60 | } |
6519 | | Value *TranslateOr(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6520 | | HLOperationLowerHelper &helper, |
6521 | 60 | HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { |
6522 | 60 | Value *x = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); |
6523 | 60 | Value *y = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); |
6524 | 60 | IRBuilder<> Builder(CI); |
6525 | | |
6526 | 60 | return Builder.CreateOr(x, y); |
6527 | 60 | } |
6528 | | Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, |
6529 | | HLOperationLowerHelper &helper, |
6530 | | HLObjectOperationLowerHelper *pObjHelper, |
6531 | 30 | bool &Translated) { |
6532 | 30 | Value *cond = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc0Idx); |
6533 | 30 | Value *t = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc1Idx); |
6534 | 30 | Value *f = CI->getArgOperand(HLOperandIndex::kTrinaryOpSrc2Idx); |
6535 | 30 | IRBuilder<> Builder(CI); |
6536 | | |
6537 | 30 | return Builder.CreateSelect(cond, t, f); |
6538 | 30 | } |
6539 | | |
6540 | | Value *TranslateMatVecMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6541 | | HLOperationLowerHelper &Helper, |
6542 | | HLObjectOperationLowerHelper *ObjHelper, |
6543 | 42 | bool &Translated) { |
6544 | | |
6545 | 42 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6546 | 42 | IRBuilder<> Builder(CI); |
6547 | | |
6548 | 42 | Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode)); |
6549 | | |
6550 | | // Input parameters |
6551 | 42 | Value *InputVector = |
6552 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulInputVectorIdx); |
6553 | 42 | Value *InputIsUnsigned = |
6554 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulIsInputUnsignedIdx); |
6555 | 42 | Value *InputInterpretation = |
6556 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulInputInterpretationIdx); |
6557 | | |
6558 | | // Matrix parameters |
6559 | 42 | Value *MatrixBuffer = |
6560 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixBufferIdx); |
6561 | 42 | Value *MatrixOffset = |
6562 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixOffsetIdx); |
6563 | 42 | Value *MatrixInterpretation = |
6564 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixInterpretationIdx); |
6565 | 42 | Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixMIdx); |
6566 | 42 | Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixKIdx); |
6567 | 42 | Value *MatrixLayout = |
6568 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixLayoutIdx); |
6569 | 42 | Value *MatrixTranspose = |
6570 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixTransposeIdx); |
6571 | 42 | Value *MatrixStride = |
6572 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixStrideIdx); |
6573 | | |
6574 | | // Output parameters |
6575 | 42 | Value *OutputIsUnsigned = |
6576 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulIsOutputUnsignedIdx); |
6577 | | |
6578 | | // Get the DXIL function for the operation |
6579 | 42 | Function *DxilFunc = HlslOp->GetOpFunc( |
6580 | 42 | OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx) |
6581 | 42 | ->getType() |
6582 | 42 | ->getPointerElementType(), |
6583 | 42 | InputVector->getType()}); |
6584 | | |
6585 | | // Create a call to the DXIL function |
6586 | 42 | Value *NewCI = Builder.CreateCall( |
6587 | 42 | DxilFunc, |
6588 | 42 | {OpArg, InputVector, InputIsUnsigned, InputInterpretation, MatrixBuffer, |
6589 | 42 | MatrixOffset, MatrixInterpretation, MatrixM, MatrixK, MatrixLayout, |
6590 | 42 | MatrixTranspose, MatrixStride, OutputIsUnsigned}); |
6591 | | |
6592 | | // Get the output parameter and store the result |
6593 | 42 | Value *OutParam = |
6594 | 42 | CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx); |
6595 | | |
6596 | 42 | Builder.CreateStore(NewCI, OutParam); |
6597 | | |
6598 | 42 | return nullptr; |
6599 | 42 | } |
6600 | | |
6601 | | Value *TranslateMatVecMulAdd(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, |
6602 | | HLOperationLowerHelper &Helper, |
6603 | | HLObjectOperationLowerHelper *ObjHelper, |
6604 | 34 | bool &Translated) { |
6605 | | |
6606 | 34 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6607 | 34 | IRBuilder<> Builder(CI); |
6608 | | |
6609 | 34 | Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode)); |
6610 | | |
6611 | | // Input vector parameters |
6612 | 34 | Value *InputVector = |
6613 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputVectorIdx); |
6614 | 34 | Value *InputIsUnsigned = |
6615 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsInputUnsignedIdx); |
6616 | 34 | Value *InputInterpretation = |
6617 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputInterpretationIdx); |
6618 | | |
6619 | | // Matrix parameters |
6620 | 34 | Value *MatrixBuffer = |
6621 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixBufferIdx); |
6622 | 34 | Value *MatrixOffset = |
6623 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixOffsetIdx); |
6624 | 34 | Value *MatrixInterpretation = |
6625 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixInterpretationIdx); |
6626 | 34 | Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixMIdx); |
6627 | 34 | Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixKIdx); |
6628 | 34 | Value *MatrixLayout = |
6629 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixLayoutIdx); |
6630 | 34 | Value *MatrixTranspose = |
6631 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixTransposeIdx); |
6632 | 34 | Value *MatrixStride = |
6633 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixStrideIdx); |
6634 | | |
6635 | | // Bias parameters |
6636 | 34 | Value *BiasBuffer = |
6637 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasBufferIdx); |
6638 | 34 | Value *BiasOffset = |
6639 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasOffsetIdx); |
6640 | 34 | Value *BiasInterpretation = |
6641 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasInterpretationIdx); |
6642 | | |
6643 | | // Output parameters |
6644 | 34 | Value *OutputIsUnsigned = |
6645 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsOutputUnsignedIdx); |
6646 | | |
6647 | | // Get the DXIL function for the operation |
6648 | 34 | Function *DxilFunc = HlslOp->GetOpFunc( |
6649 | 34 | OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx) |
6650 | 34 | ->getType() |
6651 | 34 | ->getPointerElementType(), |
6652 | 34 | InputVector->getType()}); |
6653 | | |
6654 | | // Create a call to the DXIL function |
6655 | 34 | Value *NewCI = Builder.CreateCall( |
6656 | 34 | DxilFunc, {OpArg, InputVector, InputIsUnsigned, InputInterpretation, |
6657 | 34 | MatrixBuffer, MatrixOffset, MatrixInterpretation, MatrixM, |
6658 | 34 | MatrixK, MatrixLayout, MatrixTranspose, MatrixStride, |
6659 | 34 | BiasBuffer, BiasOffset, BiasInterpretation, OutputIsUnsigned}); |
6660 | | |
6661 | | // Store the result in the output parameter |
6662 | 34 | Value *OutParam = |
6663 | 34 | CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx); |
6664 | 34 | Builder.CreateStore(NewCI, OutParam); |
6665 | | |
6666 | 34 | return nullptr; |
6667 | 34 | } |
6668 | | |
6669 | | Value *TranslateOuterProductAccumulate(CallInst *CI, IntrinsicOp IOP, |
6670 | | OP::OpCode OpCode, |
6671 | | HLOperationLowerHelper &Helper, |
6672 | | HLObjectOperationLowerHelper *ObjHelper, |
6673 | 22 | bool &Translated) { |
6674 | | |
6675 | 22 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6676 | 22 | IRBuilder<> Builder(CI); |
6677 | | |
6678 | 22 | Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode)); |
6679 | | |
6680 | | // Input vector parameters |
6681 | 22 | Value *InputVector1 = |
6682 | 22 | CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec1Idx); |
6683 | 22 | Value *InputVector2 = |
6684 | 22 | CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec2Idx); |
6685 | | |
6686 | | // Matrix parameters |
6687 | 22 | Value *MatrixBuffer = |
6688 | 22 | CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixIdx); |
6689 | 22 | Value *MatrixOffset = |
6690 | 22 | CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixOffsetIdx); |
6691 | 22 | Value *MatrixInterpretation = |
6692 | 22 | CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixInterpretationIdx); |
6693 | 22 | Value *MatrixLayout = |
6694 | 22 | CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixLayoutIdx); |
6695 | 22 | Value *MatrixStride = |
6696 | 22 | CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixStrideIdx); |
6697 | | |
6698 | | // Get the DXIL function for the operation |
6699 | 22 | Function *DxilFunc = HlslOp->GetOpFunc( |
6700 | 22 | OpCode, {InputVector1->getType(), InputVector2->getType()}); |
6701 | | |
6702 | 22 | return Builder.CreateCall( |
6703 | 22 | DxilFunc, {OpArg, InputVector1, InputVector2, MatrixBuffer, MatrixOffset, |
6704 | 22 | MatrixInterpretation, MatrixLayout, MatrixStride}); |
6705 | 22 | } |
6706 | | |
6707 | | Value *TranslateVectorAccumulate(CallInst *CI, IntrinsicOp IOP, |
6708 | | OP::OpCode OpCode, |
6709 | | HLOperationLowerHelper &Helper, |
6710 | | HLObjectOperationLowerHelper *ObjHelper, |
6711 | 18 | bool &Translated) { |
6712 | | |
6713 | 18 | hlsl::OP *HlslOp = &Helper.hlslOP; |
6714 | 18 | IRBuilder<> Builder(CI); |
6715 | | |
6716 | 18 | Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode)); |
6717 | | |
6718 | | // Input vector parameter |
6719 | 18 | Value *InputVector = CI->getArgOperand(HLOperandIndex::kVectorAccInputVecIdx); |
6720 | | |
6721 | | // Matrix parameters |
6722 | 18 | Value *MatrixBuffer = CI->getArgOperand(HLOperandIndex::kVectorAccMatrixIdx); |
6723 | 18 | Value *MatrixOffset = |
6724 | 18 | CI->getArgOperand(HLOperandIndex::kVectorAccMatrixOffsetIdx); |
6725 | | |
6726 | | // Get the DXIL function for the operation |
6727 | 18 | Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType()); |
6728 | | |
6729 | 18 | return Builder.CreateCall(DxilFunc, |
6730 | 18 | {OpArg, InputVector, MatrixBuffer, MatrixOffset}); |
6731 | 18 | } |
6732 | | |
6733 | | } // namespace |
6734 | | |
6735 | | // Lower table. |
6736 | | namespace { |
6737 | | |
6738 | | Value *EmptyLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
6739 | | HLOperationLowerHelper &helper, |
6740 | 6 | HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { |
6741 | 6 | Translated = false; |
6742 | 6 | dxilutil::EmitErrorOnInstruction(CI, "Unsupported intrinsic."); |
6743 | 6 | return nullptr; |
6744 | 6 | } |
6745 | | |
6746 | | // SPIRV change starts |
6747 | | Value *UnsupportedVulkanIntrinsic(CallInst *CI, IntrinsicOp IOP, |
6748 | | DXIL::OpCode opcode, |
6749 | | HLOperationLowerHelper &helper, |
6750 | | HLObjectOperationLowerHelper *pObjHelper, |
6751 | 0 | bool &Translated) { |
6752 | 0 | Translated = false; |
6753 | 0 | dxilutil::EmitErrorOnInstruction(CI, "Unsupported Vulkan intrinsic."); |
6754 | 0 | return nullptr; |
6755 | 0 | } |
6756 | | // SPIRV change ends |
6757 | | |
6758 | | Value *StreamOutputLower(CallInst *CI, IntrinsicOp IOP, DXIL::OpCode opcode, |
6759 | | HLOperationLowerHelper &helper, |
6760 | | HLObjectOperationLowerHelper *pObjHelper, |
6761 | 0 | bool &Translated) { |
6762 | | // Translated in DxilGenerationPass::GenerateStreamOutputOperation. |
6763 | | // Do nothing here. |
6764 | | // Mark not translated. |
6765 | 0 | Translated = false; |
6766 | 0 | return nullptr; |
6767 | 0 | } |
6768 | | |
6769 | | // This table has to match IntrinsicOp orders |
6770 | | IntrinsicLower gLowerTable[] = { |
6771 | | {IntrinsicOp::IOP_AcceptHitAndEndSearch, |
6772 | | TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::AcceptHitAndEndSearch}, |
6773 | | {IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc}, |
6774 | | {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier}, |
6775 | | {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, |
6776 | | DXIL::OpCode::Barrier}, |
6777 | | {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, |
6778 | | DXIL::OpCode::AllocateRayQuery}, |
6779 | | {IntrinsicOp::IOP_Barrier, TranslateBarrier, DXIL::OpCode::NumOpCodes}, |
6780 | | {IntrinsicOp::IOP_CallShader, TranslateCallShader, |
6781 | | DXIL::OpCode::CallShader}, |
6782 | | {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, |
6783 | | DXIL::OpCode::CheckAccessFullyMapped}, |
6784 | | {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap, |
6785 | | DXIL::OpCode::CreateHandleFromHeap}, |
6786 | | {IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, |
6787 | | DXIL::OpCode::NumOpCodes}, |
6788 | | {IntrinsicOp::IOP_DeviceMemoryBarrier, TrivialBarrier, |
6789 | | DXIL::OpCode::Barrier}, |
6790 | | {IntrinsicOp::IOP_DeviceMemoryBarrierWithGroupSync, TrivialBarrier, |
6791 | | DXIL::OpCode::Barrier}, |
6792 | | {IntrinsicOp::IOP_DispatchMesh, TrivialDispatchMesh, |
6793 | | DXIL::OpCode::DispatchMesh}, |
6794 | | {IntrinsicOp::IOP_DispatchRaysDimensions, TranslateNoArgVectorOperation, |
6795 | | DXIL::OpCode::DispatchRaysDimensions}, |
6796 | | {IntrinsicOp::IOP_DispatchRaysIndex, TranslateNoArgVectorOperation, |
6797 | | DXIL::OpCode::DispatchRaysIndex}, |
6798 | | {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, |
6799 | | DXIL::OpCode::NumOpCodes}, |
6800 | | {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, |
6801 | | DXIL::OpCode::EvalCentroid}, |
6802 | | {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, |
6803 | | DXIL::OpCode::NumOpCodes}, |
6804 | | {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation, |
6805 | | DXIL::OpCode::GeometryIndex}, |
6806 | | {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, |
6807 | | DXIL::OpCode::AttributeAtVertex}, |
6808 | | {IntrinsicOp::IOP_GetRemainingRecursionLevels, TrivialNoArgOperation, |
6809 | | DXIL::OpCode::GetRemainingRecursionLevels}, |
6810 | | {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, |
6811 | | DXIL::OpCode::RenderTargetGetSampleCount}, |
6812 | | {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, |
6813 | | DXIL::OpCode::NumOpCodes}, |
6814 | | {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, |
6815 | | DXIL::OpCode::Barrier}, |
6816 | | {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, |
6817 | | DXIL::OpCode::Barrier}, |
6818 | | {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, |
6819 | | DXIL::OpCode::HitKind}, |
6820 | | {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput, |
6821 | | DXIL::OpCode::IgnoreHit}, |
6822 | | {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, |
6823 | | DXIL::OpCode::InstanceID}, |
6824 | | {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, |
6825 | | DXIL::OpCode::InstanceIndex}, |
6826 | | {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, |
6827 | | DXIL::OpCode::NumOpCodes}, |
6828 | | {IntrinsicOp::IOP_InterlockedAnd, TranslateIopAtomicBinaryOperation, |
6829 | | DXIL::OpCode::NumOpCodes}, |
6830 | | {IntrinsicOp::IOP_InterlockedCompareExchange, TranslateIopAtomicCmpXChg, |
6831 | | DXIL::OpCode::NumOpCodes}, |
6832 | | {IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise, |
6833 | | TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
6834 | | {IntrinsicOp::IOP_InterlockedCompareStore, TranslateIopAtomicCmpXChg, |
6835 | | DXIL::OpCode::NumOpCodes}, |
6836 | | {IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise, |
6837 | | TranslateIopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
6838 | | {IntrinsicOp::IOP_InterlockedExchange, TranslateIopAtomicBinaryOperation, |
6839 | | DXIL::OpCode::NumOpCodes}, |
6840 | | {IntrinsicOp::IOP_InterlockedMax, TranslateIopAtomicBinaryOperation, |
6841 | | DXIL::OpCode::NumOpCodes}, |
6842 | | {IntrinsicOp::IOP_InterlockedMin, TranslateIopAtomicBinaryOperation, |
6843 | | DXIL::OpCode::NumOpCodes}, |
6844 | | {IntrinsicOp::IOP_InterlockedOr, TranslateIopAtomicBinaryOperation, |
6845 | | DXIL::OpCode::NumOpCodes}, |
6846 | | {IntrinsicOp::IOP_InterlockedXor, TranslateIopAtomicBinaryOperation, |
6847 | | DXIL::OpCode::NumOpCodes}, |
6848 | | {IntrinsicOp::IOP_IsHelperLane, TrivialNoArgWithRetOperation, |
6849 | | DXIL::OpCode::IsHelperLane}, |
6850 | | {IntrinsicOp::IOP_NonUniformResourceIndex, TranslateNonUniformResourceIndex, |
6851 | | DXIL::OpCode::NumOpCodes}, |
6852 | | {IntrinsicOp::IOP_ObjectRayDirection, TranslateNoArgVectorOperation, |
6853 | | DXIL::OpCode::ObjectRayDirection}, |
6854 | | {IntrinsicOp::IOP_ObjectRayOrigin, TranslateNoArgVectorOperation, |
6855 | | DXIL::OpCode::ObjectRayOrigin}, |
6856 | | {IntrinsicOp::IOP_ObjectToWorld, TranslateNoArgMatrix3x4Operation, |
6857 | | DXIL::OpCode::ObjectToWorld}, |
6858 | | {IntrinsicOp::IOP_ObjectToWorld3x4, TranslateNoArgMatrix3x4Operation, |
6859 | | DXIL::OpCode::ObjectToWorld}, |
6860 | | {IntrinsicOp::IOP_ObjectToWorld4x3, |
6861 | | TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::ObjectToWorld}, |
6862 | | {IntrinsicOp::IOP_PrimitiveIndex, TrivialNoArgWithRetOperation, |
6863 | | DXIL::OpCode::PrimitiveIndex}, |
6864 | | {IntrinsicOp::IOP_Process2DQuadTessFactorsAvg, TranslateProcessTessFactors, |
6865 | | DXIL::OpCode::NumOpCodes}, |
6866 | | {IntrinsicOp::IOP_Process2DQuadTessFactorsMax, TranslateProcessTessFactors, |
6867 | | DXIL::OpCode::NumOpCodes}, |
6868 | | {IntrinsicOp::IOP_Process2DQuadTessFactorsMin, TranslateProcessTessFactors, |
6869 | | DXIL::OpCode::NumOpCodes}, |
6870 | | {IntrinsicOp::IOP_ProcessIsolineTessFactors, |
6871 | | TranslateProcessIsolineTessFactors, DXIL::OpCode::NumOpCodes}, |
6872 | | {IntrinsicOp::IOP_ProcessQuadTessFactorsAvg, TranslateProcessTessFactors, |
6873 | | DXIL::OpCode::NumOpCodes}, |
6874 | | {IntrinsicOp::IOP_ProcessQuadTessFactorsMax, TranslateProcessTessFactors, |
6875 | | DXIL::OpCode::NumOpCodes}, |
6876 | | {IntrinsicOp::IOP_ProcessQuadTessFactorsMin, TranslateProcessTessFactors, |
6877 | | DXIL::OpCode::NumOpCodes}, |
6878 | | {IntrinsicOp::IOP_ProcessTriTessFactorsAvg, TranslateProcessTessFactors, |
6879 | | DXIL::OpCode::NumOpCodes}, |
6880 | | {IntrinsicOp::IOP_ProcessTriTessFactorsMax, TranslateProcessTessFactors, |
6881 | | DXIL::OpCode::NumOpCodes}, |
6882 | | {IntrinsicOp::IOP_ProcessTriTessFactorsMin, TranslateProcessTessFactors, |
6883 | | DXIL::OpCode::NumOpCodes}, |
6884 | | {IntrinsicOp::IOP_QuadAll, TranslateQuadAnyAll, DXIL::OpCode::QuadVote}, |
6885 | | {IntrinsicOp::IOP_QuadAny, TranslateQuadAnyAll, DXIL::OpCode::QuadVote}, |
6886 | | {IntrinsicOp::IOP_QuadReadAcrossDiagonal, TranslateQuadReadAcross, |
6887 | | DXIL::OpCode::QuadOp}, |
6888 | | {IntrinsicOp::IOP_QuadReadAcrossX, TranslateQuadReadAcross, |
6889 | | DXIL::OpCode::QuadOp}, |
6890 | | {IntrinsicOp::IOP_QuadReadAcrossY, TranslateQuadReadAcross, |
6891 | | DXIL::OpCode::QuadOp}, |
6892 | | {IntrinsicOp::IOP_QuadReadLaneAt, TranslateQuadReadLaneAt, |
6893 | | DXIL::OpCode::NumOpCodes}, |
6894 | | {IntrinsicOp::IOP_RayFlags, TrivialNoArgWithRetOperation, |
6895 | | DXIL::OpCode::RayFlags}, |
6896 | | {IntrinsicOp::IOP_RayTCurrent, TrivialNoArgWithRetOperation, |
6897 | | DXIL::OpCode::RayTCurrent}, |
6898 | | {IntrinsicOp::IOP_RayTMin, TrivialNoArgWithRetOperation, |
6899 | | DXIL::OpCode::RayTMin}, |
6900 | | {IntrinsicOp::IOP_ReportHit, TranslateReportIntersection, |
6901 | | DXIL::OpCode::ReportHit}, |
6902 | | {IntrinsicOp::IOP_SetMeshOutputCounts, TrivialSetMeshOutputCounts, |
6903 | | DXIL::OpCode::SetMeshOutputCounts}, |
6904 | | {IntrinsicOp::IOP_TraceRay, TranslateTraceRay, DXIL::OpCode::TraceRay}, |
6905 | | {IntrinsicOp::IOP_WaveActiveAllEqual, TranslateWaveAllEqual, |
6906 | | DXIL::OpCode::WaveActiveAllEqual}, |
6907 | | {IntrinsicOp::IOP_WaveActiveAllTrue, TranslateWaveA2B, |
6908 | | DXIL::OpCode::WaveAllTrue}, |
6909 | | {IntrinsicOp::IOP_WaveActiveAnyTrue, TranslateWaveA2B, |
6910 | | DXIL::OpCode::WaveAnyTrue}, |
6911 | | {IntrinsicOp::IOP_WaveActiveBallot, TranslateWaveBallot, |
6912 | | DXIL::OpCode::WaveActiveBallot}, |
6913 | | {IntrinsicOp::IOP_WaveActiveBitAnd, TranslateWaveA2A, |
6914 | | DXIL::OpCode::WaveActiveBit}, |
6915 | | {IntrinsicOp::IOP_WaveActiveBitOr, TranslateWaveA2A, |
6916 | | DXIL::OpCode::WaveActiveBit}, |
6917 | | {IntrinsicOp::IOP_WaveActiveBitXor, TranslateWaveA2A, |
6918 | | DXIL::OpCode::WaveActiveBit}, |
6919 | | {IntrinsicOp::IOP_WaveActiveCountBits, TranslateWaveA2B, |
6920 | | DXIL::OpCode::WaveAllBitCount}, |
6921 | | {IntrinsicOp::IOP_WaveActiveMax, TranslateWaveA2A, |
6922 | | DXIL::OpCode::WaveActiveOp}, |
6923 | | {IntrinsicOp::IOP_WaveActiveMin, TranslateWaveA2A, |
6924 | | DXIL::OpCode::WaveActiveOp}, |
6925 | | {IntrinsicOp::IOP_WaveActiveProduct, TranslateWaveA2A, |
6926 | | DXIL::OpCode::WaveActiveOp}, |
6927 | | {IntrinsicOp::IOP_WaveActiveSum, TranslateWaveA2A, |
6928 | | DXIL::OpCode::WaveActiveOp}, |
6929 | | {IntrinsicOp::IOP_WaveGetLaneCount, TranslateWaveToVal, |
6930 | | DXIL::OpCode::WaveGetLaneCount}, |
6931 | | {IntrinsicOp::IOP_WaveGetLaneIndex, TranslateWaveToVal, |
6932 | | DXIL::OpCode::WaveGetLaneIndex}, |
6933 | | {IntrinsicOp::IOP_WaveIsFirstLane, TranslateWaveToVal, |
6934 | | DXIL::OpCode::WaveIsFirstLane}, |
6935 | | {IntrinsicOp::IOP_WaveMatch, TranslateWaveMatch, DXIL::OpCode::WaveMatch}, |
6936 | | {IntrinsicOp::IOP_WaveMultiPrefixBitAnd, TranslateWaveMultiPrefix, |
6937 | | DXIL::OpCode::WaveMultiPrefixOp}, |
6938 | | {IntrinsicOp::IOP_WaveMultiPrefixBitOr, TranslateWaveMultiPrefix, |
6939 | | DXIL::OpCode::WaveMultiPrefixOp}, |
6940 | | {IntrinsicOp::IOP_WaveMultiPrefixBitXor, TranslateWaveMultiPrefix, |
6941 | | DXIL::OpCode::WaveMultiPrefixOp}, |
6942 | | {IntrinsicOp::IOP_WaveMultiPrefixCountBits, |
6943 | | TranslateWaveMultiPrefixBitCount, DXIL::OpCode::WaveMultiPrefixBitCount}, |
6944 | | {IntrinsicOp::IOP_WaveMultiPrefixProduct, TranslateWaveMultiPrefix, |
6945 | | DXIL::OpCode::WaveMultiPrefixOp}, |
6946 | | {IntrinsicOp::IOP_WaveMultiPrefixSum, TranslateWaveMultiPrefix, |
6947 | | DXIL::OpCode::WaveMultiPrefixOp}, |
6948 | | {IntrinsicOp::IOP_WavePrefixCountBits, TranslateWaveA2B, |
6949 | | DXIL::OpCode::WavePrefixBitCount}, |
6950 | | {IntrinsicOp::IOP_WavePrefixProduct, TranslateWaveA2A, |
6951 | | DXIL::OpCode::WavePrefixOp}, |
6952 | | {IntrinsicOp::IOP_WavePrefixSum, TranslateWaveA2A, |
6953 | | DXIL::OpCode::WavePrefixOp}, |
6954 | | {IntrinsicOp::IOP_WaveReadLaneAt, TranslateWaveReadLaneAt, |
6955 | | DXIL::OpCode::WaveReadLaneAt}, |
6956 | | {IntrinsicOp::IOP_WaveReadLaneFirst, TranslateWaveReadLaneFirst, |
6957 | | DXIL::OpCode::WaveReadLaneFirst}, |
6958 | | {IntrinsicOp::IOP_WorldRayDirection, TranslateNoArgVectorOperation, |
6959 | | DXIL::OpCode::WorldRayDirection}, |
6960 | | {IntrinsicOp::IOP_WorldRayOrigin, TranslateNoArgVectorOperation, |
6961 | | DXIL::OpCode::WorldRayOrigin}, |
6962 | | {IntrinsicOp::IOP_WorldToObject, TranslateNoArgMatrix3x4Operation, |
6963 | | DXIL::OpCode::WorldToObject}, |
6964 | | {IntrinsicOp::IOP_WorldToObject3x4, TranslateNoArgMatrix3x4Operation, |
6965 | | DXIL::OpCode::WorldToObject}, |
6966 | | {IntrinsicOp::IOP_WorldToObject4x3, |
6967 | | TranslateNoArgTransposedMatrix3x4Operation, DXIL::OpCode::WorldToObject}, |
6968 | | {IntrinsicOp::IOP_abort, EmptyLower, DXIL::OpCode::NumOpCodes}, |
6969 | | {IntrinsicOp::IOP_abs, TranslateAbs, DXIL::OpCode::NumOpCodes}, |
6970 | | {IntrinsicOp::IOP_acos, TrivialUnaryOperation, DXIL::OpCode::Acos}, |
6971 | | {IntrinsicOp::IOP_all, TranslateAll, DXIL::OpCode::NumOpCodes}, |
6972 | | {IntrinsicOp::IOP_and, TranslateAnd, DXIL::OpCode::NumOpCodes}, |
6973 | | {IntrinsicOp::IOP_any, TranslateAny, DXIL::OpCode::NumOpCodes}, |
6974 | | {IntrinsicOp::IOP_asdouble, TranslateAsDouble, DXIL::OpCode::MakeDouble}, |
6975 | | {IntrinsicOp::IOP_asfloat, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
6976 | | {IntrinsicOp::IOP_asfloat16, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
6977 | | {IntrinsicOp::IOP_asin, TrivialUnaryOperation, DXIL::OpCode::Asin}, |
6978 | | {IntrinsicOp::IOP_asint, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
6979 | | {IntrinsicOp::IOP_asint16, TranslateBitcast, DXIL::OpCode::NumOpCodes}, |
6980 | | {IntrinsicOp::IOP_asuint, TranslateAsUint, DXIL::OpCode::SplitDouble}, |
6981 | | {IntrinsicOp::IOP_asuint16, TranslateAsUint, DXIL::OpCode::NumOpCodes}, |
6982 | | {IntrinsicOp::IOP_atan, TrivialUnaryOperation, DXIL::OpCode::Atan}, |
6983 | | {IntrinsicOp::IOP_atan2, TranslateAtan2, DXIL::OpCode::NumOpCodes}, |
6984 | | {IntrinsicOp::IOP_ceil, TrivialUnaryOperation, DXIL::OpCode::Round_pi}, |
6985 | | {IntrinsicOp::IOP_clamp, TranslateClamp, DXIL::OpCode::NumOpCodes}, |
6986 | | {IntrinsicOp::IOP_clip, TranslateClip, DXIL::OpCode::NumOpCodes}, |
6987 | | {IntrinsicOp::IOP_cos, TrivialUnaryOperation, DXIL::OpCode::Cos}, |
6988 | | {IntrinsicOp::IOP_cosh, TrivialUnaryOperation, DXIL::OpCode::Hcos}, |
6989 | | {IntrinsicOp::IOP_countbits, TrivialUnaryOperationRet, |
6990 | | DXIL::OpCode::Countbits}, |
6991 | | {IntrinsicOp::IOP_cross, TranslateCross, DXIL::OpCode::NumOpCodes}, |
6992 | | {IntrinsicOp::IOP_ddx, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseX}, |
6993 | | {IntrinsicOp::IOP_ddx_coarse, TrivialUnaryOperation, |
6994 | | DXIL::OpCode::DerivCoarseX}, |
6995 | | {IntrinsicOp::IOP_ddx_fine, TrivialUnaryOperation, |
6996 | | DXIL::OpCode::DerivFineX}, |
6997 | | {IntrinsicOp::IOP_ddy, TrivialUnaryOperation, DXIL::OpCode::DerivCoarseY}, |
6998 | | {IntrinsicOp::IOP_ddy_coarse, TrivialUnaryOperation, |
6999 | | DXIL::OpCode::DerivCoarseY}, |
7000 | | {IntrinsicOp::IOP_ddy_fine, TrivialUnaryOperation, |
7001 | | DXIL::OpCode::DerivFineY}, |
7002 | | {IntrinsicOp::IOP_degrees, TranslateDegrees, DXIL::OpCode::NumOpCodes}, |
7003 | | {IntrinsicOp::IOP_determinant, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7004 | | {IntrinsicOp::IOP_distance, TranslateDistance, DXIL::OpCode::NumOpCodes}, |
7005 | | {IntrinsicOp::IOP_dot, TranslateDot, DXIL::OpCode::NumOpCodes}, |
7006 | | {IntrinsicOp::IOP_dot2add, TranslateDot2Add, DXIL::OpCode::Dot2AddHalf}, |
7007 | | {IntrinsicOp::IOP_dot4add_i8packed, TranslateDot4AddPacked, |
7008 | | DXIL::OpCode::Dot4AddI8Packed}, |
7009 | | {IntrinsicOp::IOP_dot4add_u8packed, TranslateDot4AddPacked, |
7010 | | DXIL::OpCode::Dot4AddU8Packed}, |
7011 | | {IntrinsicOp::IOP_dst, TranslateDst, DXIL::OpCode::NumOpCodes}, |
7012 | | {IntrinsicOp::IOP_exp, TranslateExp, DXIL::OpCode::NumOpCodes}, |
7013 | | {IntrinsicOp::IOP_exp2, TrivialUnaryOperation, DXIL::OpCode::Exp}, |
7014 | | {IntrinsicOp::IOP_f16tof32, TranslateF16ToF32, |
7015 | | DXIL::OpCode::LegacyF16ToF32}, |
7016 | | {IntrinsicOp::IOP_f32tof16, TranslateF32ToF16, |
7017 | | DXIL::OpCode::LegacyF32ToF16}, |
7018 | | {IntrinsicOp::IOP_faceforward, TranslateFaceforward, |
7019 | | DXIL::OpCode::NumOpCodes}, |
7020 | | {IntrinsicOp::IOP_firstbithigh, TranslateFirstbitHi, |
7021 | | DXIL::OpCode::FirstbitSHi}, |
7022 | | {IntrinsicOp::IOP_firstbitlow, TranslateFirstbitLo, |
7023 | | DXIL::OpCode::FirstbitLo}, |
7024 | | {IntrinsicOp::IOP_floor, TrivialUnaryOperation, DXIL::OpCode::Round_ni}, |
7025 | | {IntrinsicOp::IOP_fma, TrivialTrinaryOperation, DXIL::OpCode::Fma}, |
7026 | | {IntrinsicOp::IOP_fmod, TranslateFMod, DXIL::OpCode::NumOpCodes}, |
7027 | | {IntrinsicOp::IOP_frac, TrivialUnaryOperation, DXIL::OpCode::Frc}, |
7028 | | {IntrinsicOp::IOP_frexp, TranslateFrexp, DXIL::OpCode::NumOpCodes}, |
7029 | | {IntrinsicOp::IOP_fwidth, TranslateFWidth, DXIL::OpCode::NumOpCodes}, |
7030 | | {IntrinsicOp::IOP_isfinite, TrivialIsSpecialFloat, DXIL::OpCode::IsFinite}, |
7031 | | {IntrinsicOp::IOP_isinf, TrivialIsSpecialFloat, DXIL::OpCode::IsInf}, |
7032 | | {IntrinsicOp::IOP_isnan, TrivialIsSpecialFloat, DXIL::OpCode::IsNaN}, |
7033 | | {IntrinsicOp::IOP_ldexp, TranslateLdExp, DXIL::OpCode::NumOpCodes}, |
7034 | | {IntrinsicOp::IOP_length, TranslateLength, DXIL::OpCode::NumOpCodes}, |
7035 | | {IntrinsicOp::IOP_lerp, TranslateLerp, DXIL::OpCode::NumOpCodes}, |
7036 | | {IntrinsicOp::IOP_lit, TranslateLit, DXIL::OpCode::NumOpCodes}, |
7037 | | {IntrinsicOp::IOP_log, TranslateLog, DXIL::OpCode::NumOpCodes}, |
7038 | | {IntrinsicOp::IOP_log10, TranslateLog10, DXIL::OpCode::NumOpCodes}, |
7039 | | {IntrinsicOp::IOP_log2, TrivialUnaryOperation, DXIL::OpCode::Log}, |
7040 | | {IntrinsicOp::IOP_mad, TranslateFUITrinary, DXIL::OpCode::IMad}, |
7041 | | {IntrinsicOp::IOP_max, TranslateFUIBinary, DXIL::OpCode::IMax}, |
7042 | | {IntrinsicOp::IOP_min, TranslateFUIBinary, DXIL::OpCode::IMin}, |
7043 | | {IntrinsicOp::IOP_modf, TranslateModF, DXIL::OpCode::NumOpCodes}, |
7044 | | {IntrinsicOp::IOP_msad4, TranslateMSad4, DXIL::OpCode::NumOpCodes}, |
7045 | | {IntrinsicOp::IOP_mul, TranslateMul, DXIL::OpCode::NumOpCodes}, |
7046 | | {IntrinsicOp::IOP_normalize, TranslateNormalize, DXIL::OpCode::NumOpCodes}, |
7047 | | {IntrinsicOp::IOP_or, TranslateOr, DXIL::OpCode::NumOpCodes}, |
7048 | | {IntrinsicOp::IOP_pack_clamp_s8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7049 | | {IntrinsicOp::IOP_pack_clamp_u8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7050 | | {IntrinsicOp::IOP_pack_s8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7051 | | {IntrinsicOp::IOP_pack_u8, TranslatePack, DXIL::OpCode::Pack4x8}, |
7052 | | {IntrinsicOp::IOP_pow, TranslatePow, DXIL::OpCode::NumOpCodes}, |
7053 | | {IntrinsicOp::IOP_printf, TranslatePrintf, DXIL::OpCode::NumOpCodes}, |
7054 | | {IntrinsicOp::IOP_radians, TranslateRadians, DXIL::OpCode::NumOpCodes}, |
7055 | | {IntrinsicOp::IOP_rcp, TranslateRCP, DXIL::OpCode::NumOpCodes}, |
7056 | | {IntrinsicOp::IOP_reflect, TranslateReflect, DXIL::OpCode::NumOpCodes}, |
7057 | | {IntrinsicOp::IOP_refract, TranslateRefract, DXIL::OpCode::NumOpCodes}, |
7058 | | {IntrinsicOp::IOP_reversebits, TrivialUnaryOperation, DXIL::OpCode::Bfrev}, |
7059 | | {IntrinsicOp::IOP_round, TrivialUnaryOperation, DXIL::OpCode::Round_ne}, |
7060 | | {IntrinsicOp::IOP_rsqrt, TrivialUnaryOperation, DXIL::OpCode::Rsqrt}, |
7061 | | {IntrinsicOp::IOP_saturate, TrivialUnaryOperation, DXIL::OpCode::Saturate}, |
7062 | | {IntrinsicOp::IOP_select, TranslateSelect, DXIL::OpCode::NumOpCodes}, |
7063 | | {IntrinsicOp::IOP_sign, TranslateSign, DXIL::OpCode::NumOpCodes}, |
7064 | | {IntrinsicOp::IOP_sin, TrivialUnaryOperation, DXIL::OpCode::Sin}, |
7065 | | {IntrinsicOp::IOP_sincos, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7066 | | {IntrinsicOp::IOP_sinh, TrivialUnaryOperation, DXIL::OpCode::Hsin}, |
7067 | | {IntrinsicOp::IOP_smoothstep, TranslateSmoothStep, |
7068 | | DXIL::OpCode::NumOpCodes}, |
7069 | | {IntrinsicOp::IOP_source_mark, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7070 | | {IntrinsicOp::IOP_sqrt, TrivialUnaryOperation, DXIL::OpCode::Sqrt}, |
7071 | | {IntrinsicOp::IOP_step, TranslateStep, DXIL::OpCode::NumOpCodes}, |
7072 | | {IntrinsicOp::IOP_tan, TrivialUnaryOperation, DXIL::OpCode::Tan}, |
7073 | | {IntrinsicOp::IOP_tanh, TrivialUnaryOperation, DXIL::OpCode::Htan}, |
7074 | | {IntrinsicOp::IOP_tex1D, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7075 | | {IntrinsicOp::IOP_tex1Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7076 | | {IntrinsicOp::IOP_tex1Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7077 | | {IntrinsicOp::IOP_tex1Dlod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7078 | | {IntrinsicOp::IOP_tex1Dproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7079 | | {IntrinsicOp::IOP_tex2D, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7080 | | {IntrinsicOp::IOP_tex2Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7081 | | {IntrinsicOp::IOP_tex2Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7082 | | {IntrinsicOp::IOP_tex2Dlod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7083 | | {IntrinsicOp::IOP_tex2Dproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7084 | | {IntrinsicOp::IOP_tex3D, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7085 | | {IntrinsicOp::IOP_tex3Dbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7086 | | {IntrinsicOp::IOP_tex3Dgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7087 | | {IntrinsicOp::IOP_tex3Dlod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7088 | | {IntrinsicOp::IOP_tex3Dproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7089 | | {IntrinsicOp::IOP_texCUBE, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7090 | | {IntrinsicOp::IOP_texCUBEbias, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7091 | | {IntrinsicOp::IOP_texCUBEgrad, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7092 | | {IntrinsicOp::IOP_texCUBElod, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7093 | | {IntrinsicOp::IOP_texCUBEproj, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7094 | | {IntrinsicOp::IOP_transpose, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7095 | | {IntrinsicOp::IOP_trunc, TrivialUnaryOperation, DXIL::OpCode::Round_z}, |
7096 | | {IntrinsicOp::IOP_unpack_s8s16, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7097 | | {IntrinsicOp::IOP_unpack_s8s32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7098 | | {IntrinsicOp::IOP_unpack_u8u16, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7099 | | {IntrinsicOp::IOP_unpack_u8u32, TranslateUnpack, DXIL::OpCode::Unpack4x8}, |
7100 | | {IntrinsicOp::IOP_VkRawBufferLoad, UnsupportedVulkanIntrinsic, |
7101 | | DXIL::OpCode::NumOpCodes}, |
7102 | | {IntrinsicOp::IOP_VkRawBufferStore, UnsupportedVulkanIntrinsic, |
7103 | | DXIL::OpCode::NumOpCodes}, |
7104 | | {IntrinsicOp::IOP_VkReadClock, UnsupportedVulkanIntrinsic, |
7105 | | DXIL::OpCode::NumOpCodes}, |
7106 | | {IntrinsicOp::IOP_Vkext_execution_mode, UnsupportedVulkanIntrinsic, |
7107 | | DXIL::OpCode::NumOpCodes}, |
7108 | | {IntrinsicOp::IOP_Vkext_execution_mode_id, UnsupportedVulkanIntrinsic, |
7109 | | DXIL::OpCode::NumOpCodes}, |
7110 | | {IntrinsicOp::MOP_Append, StreamOutputLower, DXIL::OpCode::EmitStream}, |
7111 | | {IntrinsicOp::MOP_RestartStrip, StreamOutputLower, DXIL::OpCode::CutStream}, |
7112 | | {IntrinsicOp::MOP_CalculateLevelOfDetail, TranslateCalculateLOD, |
7113 | | DXIL::OpCode::NumOpCodes}, |
7114 | | {IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped, TranslateCalculateLOD, |
7115 | | DXIL::OpCode::NumOpCodes}, |
7116 | | {IntrinsicOp::MOP_GetDimensions, TranslateGetDimensions, |
7117 | | DXIL::OpCode::NumOpCodes}, |
7118 | | {IntrinsicOp::MOP_Load, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7119 | | {IntrinsicOp::MOP_Sample, TranslateSample, DXIL::OpCode::Sample}, |
7120 | | {IntrinsicOp::MOP_SampleBias, TranslateSample, DXIL::OpCode::SampleBias}, |
7121 | | {IntrinsicOp::MOP_SampleCmp, TranslateSample, DXIL::OpCode::SampleCmp}, |
7122 | | {IntrinsicOp::MOP_SampleCmpBias, TranslateSample, |
7123 | | DXIL::OpCode::SampleCmpBias}, |
7124 | | {IntrinsicOp::MOP_SampleCmpGrad, TranslateSample, |
7125 | | DXIL::OpCode::SampleCmpGrad}, |
7126 | | {IntrinsicOp::MOP_SampleCmpLevel, TranslateSample, |
7127 | | DXIL::OpCode::SampleCmpLevel}, |
7128 | | {IntrinsicOp::MOP_SampleCmpLevelZero, TranslateSample, |
7129 | | DXIL::OpCode::SampleCmpLevelZero}, |
7130 | | {IntrinsicOp::MOP_SampleGrad, TranslateSample, DXIL::OpCode::SampleGrad}, |
7131 | | {IntrinsicOp::MOP_SampleLevel, TranslateSample, DXIL::OpCode::SampleLevel}, |
7132 | | {IntrinsicOp::MOP_Gather, TranslateGather, DXIL::OpCode::TextureGather}, |
7133 | | {IntrinsicOp::MOP_GatherAlpha, TranslateGather, |
7134 | | DXIL::OpCode::TextureGather}, |
7135 | | {IntrinsicOp::MOP_GatherBlue, TranslateGather, DXIL::OpCode::TextureGather}, |
7136 | | {IntrinsicOp::MOP_GatherCmp, TranslateGather, |
7137 | | DXIL::OpCode::TextureGatherCmp}, |
7138 | | {IntrinsicOp::MOP_GatherCmpAlpha, TranslateGather, |
7139 | | DXIL::OpCode::TextureGatherCmp}, |
7140 | | {IntrinsicOp::MOP_GatherCmpBlue, TranslateGather, |
7141 | | DXIL::OpCode::TextureGatherCmp}, |
7142 | | {IntrinsicOp::MOP_GatherCmpGreen, TranslateGather, |
7143 | | DXIL::OpCode::TextureGatherCmp}, |
7144 | | {IntrinsicOp::MOP_GatherCmpRed, TranslateGather, |
7145 | | DXIL::OpCode::TextureGatherCmp}, |
7146 | | {IntrinsicOp::MOP_GatherGreen, TranslateGather, |
7147 | | DXIL::OpCode::TextureGather}, |
7148 | | {IntrinsicOp::MOP_GatherRaw, TranslateGather, |
7149 | | DXIL::OpCode::TextureGatherRaw}, |
7150 | | {IntrinsicOp::MOP_GatherRed, TranslateGather, DXIL::OpCode::TextureGather}, |
7151 | | {IntrinsicOp::MOP_GetSamplePosition, TranslateGetSamplePosition, |
7152 | | DXIL::OpCode::NumOpCodes}, |
7153 | | {IntrinsicOp::MOP_Load2, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7154 | | {IntrinsicOp::MOP_Load3, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7155 | | {IntrinsicOp::MOP_Load4, TranslateResourceLoad, DXIL::OpCode::NumOpCodes}, |
7156 | | {IntrinsicOp::MOP_InterlockedAdd, TranslateMopAtomicBinaryOperation, |
7157 | | DXIL::OpCode::NumOpCodes}, |
7158 | | {IntrinsicOp::MOP_InterlockedAdd64, TranslateMopAtomicBinaryOperation, |
7159 | | DXIL::OpCode::NumOpCodes}, |
7160 | | {IntrinsicOp::MOP_InterlockedAnd, TranslateMopAtomicBinaryOperation, |
7161 | | DXIL::OpCode::NumOpCodes}, |
7162 | | {IntrinsicOp::MOP_InterlockedAnd64, TranslateMopAtomicBinaryOperation, |
7163 | | DXIL::OpCode::NumOpCodes}, |
7164 | | {IntrinsicOp::MOP_InterlockedCompareExchange, TranslateMopAtomicCmpXChg, |
7165 | | DXIL::OpCode::NumOpCodes}, |
7166 | | {IntrinsicOp::MOP_InterlockedCompareExchange64, TranslateMopAtomicCmpXChg, |
7167 | | DXIL::OpCode::NumOpCodes}, |
7168 | | {IntrinsicOp::MOP_InterlockedCompareExchangeFloatBitwise, |
7169 | | TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
7170 | | {IntrinsicOp::MOP_InterlockedCompareStore, TranslateMopAtomicCmpXChg, |
7171 | | DXIL::OpCode::NumOpCodes}, |
7172 | | {IntrinsicOp::MOP_InterlockedCompareStore64, TranslateMopAtomicCmpXChg, |
7173 | | DXIL::OpCode::NumOpCodes}, |
7174 | | {IntrinsicOp::MOP_InterlockedCompareStoreFloatBitwise, |
7175 | | TranslateMopAtomicCmpXChg, DXIL::OpCode::NumOpCodes}, |
7176 | | {IntrinsicOp::MOP_InterlockedExchange, TranslateMopAtomicBinaryOperation, |
7177 | | DXIL::OpCode::NumOpCodes}, |
7178 | | {IntrinsicOp::MOP_InterlockedExchange64, TranslateMopAtomicBinaryOperation, |
7179 | | DXIL::OpCode::NumOpCodes}, |
7180 | | {IntrinsicOp::MOP_InterlockedExchangeFloat, |
7181 | | TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, |
7182 | | {IntrinsicOp::MOP_InterlockedMax, TranslateMopAtomicBinaryOperation, |
7183 | | DXIL::OpCode::NumOpCodes}, |
7184 | | {IntrinsicOp::MOP_InterlockedMax64, TranslateMopAtomicBinaryOperation, |
7185 | | DXIL::OpCode::NumOpCodes}, |
7186 | | {IntrinsicOp::MOP_InterlockedMin, TranslateMopAtomicBinaryOperation, |
7187 | | DXIL::OpCode::NumOpCodes}, |
7188 | | {IntrinsicOp::MOP_InterlockedMin64, TranslateMopAtomicBinaryOperation, |
7189 | | DXIL::OpCode::NumOpCodes}, |
7190 | | {IntrinsicOp::MOP_InterlockedOr, TranslateMopAtomicBinaryOperation, |
7191 | | DXIL::OpCode::NumOpCodes}, |
7192 | | {IntrinsicOp::MOP_InterlockedOr64, TranslateMopAtomicBinaryOperation, |
7193 | | DXIL::OpCode::NumOpCodes}, |
7194 | | {IntrinsicOp::MOP_InterlockedXor, TranslateMopAtomicBinaryOperation, |
7195 | | DXIL::OpCode::NumOpCodes}, |
7196 | | {IntrinsicOp::MOP_InterlockedXor64, TranslateMopAtomicBinaryOperation, |
7197 | | DXIL::OpCode::NumOpCodes}, |
7198 | | {IntrinsicOp::MOP_Store, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7199 | | {IntrinsicOp::MOP_Store2, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7200 | | {IntrinsicOp::MOP_Store3, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7201 | | {IntrinsicOp::MOP_Store4, TranslateResourceStore, DXIL::OpCode::NumOpCodes}, |
7202 | | {IntrinsicOp::MOP_DecrementCounter, GenerateUpdateCounter, |
7203 | | DXIL::OpCode::NumOpCodes}, |
7204 | | {IntrinsicOp::MOP_IncrementCounter, GenerateUpdateCounter, |
7205 | | DXIL::OpCode::NumOpCodes}, |
7206 | | {IntrinsicOp::MOP_Consume, EmptyLower, DXIL::OpCode::NumOpCodes}, |
7207 | | {IntrinsicOp::MOP_WriteSamplerFeedback, TranslateWriteSamplerFeedback, |
7208 | | DXIL::OpCode::WriteSamplerFeedback}, |
7209 | | {IntrinsicOp::MOP_WriteSamplerFeedbackBias, TranslateWriteSamplerFeedback, |
7210 | | DXIL::OpCode::WriteSamplerFeedbackBias}, |
7211 | | {IntrinsicOp::MOP_WriteSamplerFeedbackGrad, TranslateWriteSamplerFeedback, |
7212 | | DXIL::OpCode::WriteSamplerFeedbackGrad}, |
7213 | | {IntrinsicOp::MOP_WriteSamplerFeedbackLevel, TranslateWriteSamplerFeedback, |
7214 | | DXIL::OpCode::WriteSamplerFeedbackLevel}, |
7215 | | |
7216 | | {IntrinsicOp::MOP_Abort, TranslateGenericRayQueryMethod, |
7217 | | DXIL::OpCode::RayQuery_Abort}, |
7218 | | {IntrinsicOp::MOP_CandidateGeometryIndex, TranslateGenericRayQueryMethod, |
7219 | | DXIL::OpCode::RayQuery_CandidateGeometryIndex}, |
7220 | | {IntrinsicOp::MOP_CandidateInstanceContributionToHitGroupIndex, |
7221 | | TranslateGenericRayQueryMethod, |
7222 | | DXIL::OpCode::RayQuery_CandidateInstanceContributionToHitGroupIndex}, |
7223 | | {IntrinsicOp::MOP_CandidateInstanceID, TranslateGenericRayQueryMethod, |
7224 | | DXIL::OpCode::RayQuery_CandidateInstanceID}, |
7225 | | {IntrinsicOp::MOP_CandidateInstanceIndex, TranslateGenericRayQueryMethod, |
7226 | | DXIL::OpCode::RayQuery_CandidateInstanceIndex}, |
7227 | | {IntrinsicOp::MOP_CandidateObjectRayDirection, |
7228 | | TranslateRayQueryFloat3Getter, |
7229 | | DXIL::OpCode::RayQuery_CandidateObjectRayDirection}, |
7230 | | {IntrinsicOp::MOP_CandidateObjectRayOrigin, TranslateRayQueryFloat3Getter, |
7231 | | DXIL::OpCode::RayQuery_CandidateObjectRayOrigin}, |
7232 | | {IntrinsicOp::MOP_CandidateObjectToWorld3x4, |
7233 | | TranslateRayQueryMatrix3x4Operation, |
7234 | | DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4}, |
7235 | | {IntrinsicOp::MOP_CandidateObjectToWorld4x3, |
7236 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7237 | | DXIL::OpCode::RayQuery_CandidateObjectToWorld3x4}, |
7238 | | {IntrinsicOp::MOP_CandidatePrimitiveIndex, TranslateGenericRayQueryMethod, |
7239 | | DXIL::OpCode::RayQuery_CandidatePrimitiveIndex}, |
7240 | | {IntrinsicOp::MOP_CandidateProceduralPrimitiveNonOpaque, |
7241 | | TranslateGenericRayQueryMethod, |
7242 | | DXIL::OpCode::RayQuery_CandidateProceduralPrimitiveNonOpaque}, |
7243 | | {IntrinsicOp::MOP_CandidateTriangleBarycentrics, |
7244 | | TranslateRayQueryFloat2Getter, |
7245 | | DXIL::OpCode::RayQuery_CandidateTriangleBarycentrics}, |
7246 | | {IntrinsicOp::MOP_CandidateTriangleFrontFace, |
7247 | | TranslateGenericRayQueryMethod, |
7248 | | DXIL::OpCode::RayQuery_CandidateTriangleFrontFace}, |
7249 | | {IntrinsicOp::MOP_CandidateTriangleRayT, TranslateGenericRayQueryMethod, |
7250 | | DXIL::OpCode::RayQuery_CandidateTriangleRayT}, |
7251 | | {IntrinsicOp::MOP_CandidateType, TranslateGenericRayQueryMethod, |
7252 | | DXIL::OpCode::RayQuery_CandidateType}, |
7253 | | {IntrinsicOp::MOP_CandidateWorldToObject3x4, |
7254 | | TranslateRayQueryMatrix3x4Operation, |
7255 | | DXIL::OpCode::RayQuery_CandidateWorldToObject3x4}, |
7256 | | {IntrinsicOp::MOP_CandidateWorldToObject4x3, |
7257 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7258 | | DXIL::OpCode::RayQuery_CandidateWorldToObject3x4}, |
7259 | | {IntrinsicOp::MOP_CommitNonOpaqueTriangleHit, |
7260 | | TranslateGenericRayQueryMethod, |
7261 | | DXIL::OpCode::RayQuery_CommitNonOpaqueTriangleHit}, |
7262 | | {IntrinsicOp::MOP_CommitProceduralPrimitiveHit, |
7263 | | TranslateCommitProceduralPrimitiveHit, |
7264 | | DXIL::OpCode::RayQuery_CommitProceduralPrimitiveHit}, |
7265 | | {IntrinsicOp::MOP_CommittedGeometryIndex, TranslateGenericRayQueryMethod, |
7266 | | DXIL::OpCode::RayQuery_CommittedGeometryIndex}, |
7267 | | {IntrinsicOp::MOP_CommittedInstanceContributionToHitGroupIndex, |
7268 | | TranslateGenericRayQueryMethod, |
7269 | | DXIL::OpCode::RayQuery_CommittedInstanceContributionToHitGroupIndex}, |
7270 | | {IntrinsicOp::MOP_CommittedInstanceID, TranslateGenericRayQueryMethod, |
7271 | | DXIL::OpCode::RayQuery_CommittedInstanceID}, |
7272 | | {IntrinsicOp::MOP_CommittedInstanceIndex, TranslateGenericRayQueryMethod, |
7273 | | DXIL::OpCode::RayQuery_CommittedInstanceIndex}, |
7274 | | {IntrinsicOp::MOP_CommittedObjectRayDirection, |
7275 | | TranslateRayQueryFloat3Getter, |
7276 | | DXIL::OpCode::RayQuery_CommittedObjectRayDirection}, |
7277 | | {IntrinsicOp::MOP_CommittedObjectRayOrigin, TranslateRayQueryFloat3Getter, |
7278 | | DXIL::OpCode::RayQuery_CommittedObjectRayOrigin}, |
7279 | | {IntrinsicOp::MOP_CommittedObjectToWorld3x4, |
7280 | | TranslateRayQueryMatrix3x4Operation, |
7281 | | DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4}, |
7282 | | {IntrinsicOp::MOP_CommittedObjectToWorld4x3, |
7283 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7284 | | DXIL::OpCode::RayQuery_CommittedObjectToWorld3x4}, |
7285 | | {IntrinsicOp::MOP_CommittedPrimitiveIndex, TranslateGenericRayQueryMethod, |
7286 | | DXIL::OpCode::RayQuery_CommittedPrimitiveIndex}, |
7287 | | {IntrinsicOp::MOP_CommittedRayT, TranslateGenericRayQueryMethod, |
7288 | | DXIL::OpCode::RayQuery_CommittedRayT}, |
7289 | | {IntrinsicOp::MOP_CommittedStatus, TranslateGenericRayQueryMethod, |
7290 | | DXIL::OpCode::RayQuery_CommittedStatus}, |
7291 | | {IntrinsicOp::MOP_CommittedTriangleBarycentrics, |
7292 | | TranslateRayQueryFloat2Getter, |
7293 | | DXIL::OpCode::RayQuery_CommittedTriangleBarycentrics}, |
7294 | | {IntrinsicOp::MOP_CommittedTriangleFrontFace, |
7295 | | TranslateGenericRayQueryMethod, |
7296 | | DXIL::OpCode::RayQuery_CommittedTriangleFrontFace}, |
7297 | | {IntrinsicOp::MOP_CommittedWorldToObject3x4, |
7298 | | TranslateRayQueryMatrix3x4Operation, |
7299 | | DXIL::OpCode::RayQuery_CommittedWorldToObject3x4}, |
7300 | | {IntrinsicOp::MOP_CommittedWorldToObject4x3, |
7301 | | TranslateRayQueryTransposedMatrix3x4Operation, |
7302 | | DXIL::OpCode::RayQuery_CommittedWorldToObject3x4}, |
7303 | | {IntrinsicOp::MOP_Proceed, TranslateGenericRayQueryMethod, |
7304 | | DXIL::OpCode::RayQuery_Proceed}, |
7305 | | {IntrinsicOp::MOP_RayFlags, TranslateGenericRayQueryMethod, |
7306 | | DXIL::OpCode::RayQuery_RayFlags}, |
7307 | | {IntrinsicOp::MOP_RayTMin, TranslateGenericRayQueryMethod, |
7308 | | DXIL::OpCode::RayQuery_RayTMin}, |
7309 | | {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline, |
7310 | | DXIL::OpCode::RayQuery_TraceRayInline}, |
7311 | | {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter, |
7312 | | DXIL::OpCode::RayQuery_WorldRayDirection}, |
7313 | | {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter, |
7314 | | DXIL::OpCode::RayQuery_WorldRayOrigin}, |
7315 | | {IntrinsicOp::MOP_Count, TranslateNodeGetInputRecordCount, |
7316 | | DXIL::OpCode::GetInputRecordCount}, |
7317 | | {IntrinsicOp::MOP_FinishedCrossGroupSharing, |
7318 | | TranslateNodeFinishedCrossGroupSharing, |
7319 | | DXIL::OpCode::FinishedCrossGroupSharing}, |
7320 | | {IntrinsicOp::MOP_GetGroupNodeOutputRecords, |
7321 | | TranslateGetGroupNodeOutputRecords, |
7322 | | DXIL::OpCode::AllocateNodeOutputRecords}, |
7323 | | {IntrinsicOp::MOP_GetThreadNodeOutputRecords, |
7324 | | TranslateGetThreadNodeOutputRecords, |
7325 | | DXIL::OpCode::AllocateNodeOutputRecords}, |
7326 | | {IntrinsicOp::MOP_IsValid, TranslateNodeOutputIsValid, |
7327 | | DXIL::OpCode::NodeOutputIsValid}, |
7328 | | {IntrinsicOp::MOP_GroupIncrementOutputCount, |
7329 | | TranslateNodeGroupIncrementOutputCount, |
7330 | | DXIL::OpCode::IncrementOutputCount}, |
7331 | | {IntrinsicOp::MOP_ThreadIncrementOutputCount, |
7332 | | TranslateNodeThreadIncrementOutputCount, |
7333 | | DXIL::OpCode::IncrementOutputCount}, |
7334 | | {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete, |
7335 | | DXIL::OpCode::OutputComplete}, |
7336 | | |
7337 | | // SPIRV change starts |
7338 | | {IntrinsicOp::MOP_SubpassLoad, UnsupportedVulkanIntrinsic, |
7339 | | DXIL::OpCode::NumOpCodes}, |
7340 | | // SPIRV change ends |
7341 | | |
7342 | | // Manually added part. |
7343 | | {IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, |
7344 | | DXIL::OpCode::NumOpCodes}, |
7345 | | {IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, |
7346 | | DXIL::OpCode::NumOpCodes}, |
7347 | | {IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, |
7348 | | DXIL::OpCode::WaveActiveOp}, |
7349 | | {IntrinsicOp::IOP_WaveActiveUMin, TranslateWaveA2A, |
7350 | | DXIL::OpCode::WaveActiveOp}, |
7351 | | {IntrinsicOp::IOP_WaveActiveUProduct, TranslateWaveA2A, |
7352 | | DXIL::OpCode::WaveActiveOp}, |
7353 | | {IntrinsicOp::IOP_WaveActiveUSum, TranslateWaveA2A, |
7354 | | DXIL::OpCode::WaveActiveOp}, |
7355 | | {IntrinsicOp::IOP_WaveMultiPrefixUProduct, TranslateWaveMultiPrefix, |
7356 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7357 | | {IntrinsicOp::IOP_WaveMultiPrefixUSum, TranslateWaveMultiPrefix, |
7358 | | DXIL::OpCode::WaveMultiPrefixOp}, |
7359 | | {IntrinsicOp::IOP_WavePrefixUProduct, TranslateWaveA2A, |
7360 | | DXIL::OpCode::WavePrefixOp}, |
7361 | | {IntrinsicOp::IOP_WavePrefixUSum, TranslateWaveA2A, |
7362 | | DXIL::OpCode::WavePrefixOp}, |
7363 | | {IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes}, |
7364 | | {IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes}, |
7365 | | {IntrinsicOp::IOP_udot, TranslateDot, DXIL::OpCode::NumOpCodes}, |
7366 | | {IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi, |
7367 | | DXIL::OpCode::FirstbitHi}, |
7368 | | {IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad}, |
7369 | | {IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax}, |
7370 | | {IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin}, |
7371 | | {IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul}, |
7372 | | {IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax}, |
7373 | | {IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, |
7374 | | DXIL::OpCode::NumOpCodes}, |
7375 | | {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, |
7376 | | DXIL::OpCode::NumOpCodes}, |
7377 | | {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMakeNop, |
7378 | | DXIL::OpCode::HitObject_MakeNop}, |
7379 | | {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, |
7380 | | DXIL::OpCode::MaybeReorderThread}, |
7381 | | {IntrinsicOp::IOP_Vkstatic_pointer_cast, UnsupportedVulkanIntrinsic, |
7382 | | DXIL::OpCode::NumOpCodes}, |
7383 | | {IntrinsicOp::IOP_Vkreinterpret_pointer_cast, UnsupportedVulkanIntrinsic, |
7384 | | DXIL::OpCode::NumOpCodes}, |
7385 | | {IntrinsicOp::MOP_GetBufferContents, UnsupportedVulkanIntrinsic, |
7386 | | DXIL::OpCode::NumOpCodes}, |
7387 | | {IntrinsicOp::MOP_DxHitObject_FromRayQuery, TranslateHitObjectFromRayQuery, |
7388 | | DXIL::OpCode::HitObject_FromRayQuery}, |
7389 | | {IntrinsicOp::MOP_DxHitObject_GetAttributes, |
7390 | | TranslateHitObjectGetAttributes, DXIL::OpCode::HitObject_Attributes}, |
7391 | | {IntrinsicOp::MOP_DxHitObject_GetGeometryIndex, |
7392 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_GeometryIndex}, |
7393 | | {IntrinsicOp::MOP_DxHitObject_GetHitKind, TranslateHitObjectScalarGetter, |
7394 | | DXIL::OpCode::HitObject_HitKind}, |
7395 | | {IntrinsicOp::MOP_DxHitObject_GetInstanceID, TranslateHitObjectScalarGetter, |
7396 | | DXIL::OpCode::HitObject_InstanceID}, |
7397 | | {IntrinsicOp::MOP_DxHitObject_GetInstanceIndex, |
7398 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_InstanceIndex}, |
7399 | | {IntrinsicOp::MOP_DxHitObject_GetObjectRayDirection, |
7400 | | TranslateHitObjectVectorGetter, |
7401 | | DXIL::OpCode::HitObject_ObjectRayDirection}, |
7402 | | {IntrinsicOp::MOP_DxHitObject_GetObjectRayOrigin, |
7403 | | TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_ObjectRayOrigin}, |
7404 | | {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4, |
7405 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, |
7406 | | {IntrinsicOp::MOP_DxHitObject_GetObjectToWorld4x3, |
7407 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_ObjectToWorld3x4}, |
7408 | | {IntrinsicOp::MOP_DxHitObject_GetPrimitiveIndex, |
7409 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_PrimitiveIndex}, |
7410 | | {IntrinsicOp::MOP_DxHitObject_GetRayFlags, TranslateHitObjectScalarGetter, |
7411 | | DXIL::OpCode::HitObject_RayFlags}, |
7412 | | {IntrinsicOp::MOP_DxHitObject_GetRayTCurrent, |
7413 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_RayTCurrent}, |
7414 | | {IntrinsicOp::MOP_DxHitObject_GetRayTMin, TranslateHitObjectScalarGetter, |
7415 | | DXIL::OpCode::HitObject_RayTMin}, |
7416 | | {IntrinsicOp::MOP_DxHitObject_GetShaderTableIndex, |
7417 | | TranslateHitObjectScalarGetter, DXIL::OpCode::HitObject_ShaderTableIndex}, |
7418 | | {IntrinsicOp::MOP_DxHitObject_GetWorldRayDirection, |
7419 | | TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayDirection}, |
7420 | | {IntrinsicOp::MOP_DxHitObject_GetWorldRayOrigin, |
7421 | | TranslateHitObjectVectorGetter, DXIL::OpCode::HitObject_WorldRayOrigin}, |
7422 | | {IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4, |
7423 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, |
7424 | | {IntrinsicOp::MOP_DxHitObject_GetWorldToObject4x3, |
7425 | | TranslateHitObjectMatrixGetter, DXIL::OpCode::HitObject_WorldToObject3x4}, |
7426 | | {IntrinsicOp::MOP_DxHitObject_Invoke, TranslateHitObjectInvoke, |
7427 | | DXIL::OpCode::HitObject_Invoke}, |
7428 | | {IntrinsicOp::MOP_DxHitObject_IsHit, TranslateHitObjectScalarGetter, |
7429 | | DXIL::OpCode::HitObject_IsHit}, |
7430 | | {IntrinsicOp::MOP_DxHitObject_IsMiss, TranslateHitObjectScalarGetter, |
7431 | | DXIL::OpCode::HitObject_IsMiss}, |
7432 | | {IntrinsicOp::MOP_DxHitObject_IsNop, TranslateHitObjectScalarGetter, |
7433 | | DXIL::OpCode::HitObject_IsNop}, |
7434 | | {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant, |
7435 | | TranslateHitObjectLoadLocalRootTableConstant, |
7436 | | DXIL::OpCode::HitObject_LoadLocalRootTableConstant}, |
7437 | | {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMakeMiss, |
7438 | | DXIL::OpCode::HitObject_MakeMiss}, |
7439 | | {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex, |
7440 | | TranslateHitObjectSetShaderTableIndex, |
7441 | | DXIL::OpCode::HitObject_SetShaderTableIndex}, |
7442 | | {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay, |
7443 | | DXIL::OpCode::HitObject_TraceRay}, |
7444 | | |
7445 | | {IntrinsicOp::IOP___builtin_MatVecMul, TranslateMatVecMul, |
7446 | | DXIL::OpCode::MatVecMul}, |
7447 | | {IntrinsicOp::IOP___builtin_MatVecMulAdd, TranslateMatVecMulAdd, |
7448 | | DXIL::OpCode::MatVecMulAdd}, |
7449 | | {IntrinsicOp::IOP___builtin_OuterProductAccumulate, |
7450 | | TranslateOuterProductAccumulate, DXIL::OpCode::OuterProductAccumulate}, |
7451 | | {IntrinsicOp::IOP___builtin_VectorAccumulate, TranslateVectorAccumulate, |
7452 | | DXIL::OpCode::VectorAccumulate}, |
7453 | | }; |
7454 | | } // namespace |
7455 | | static_assert( |
7456 | | sizeof(gLowerTable) / sizeof(gLowerTable[0]) == |
7457 | | static_cast<size_t>(IntrinsicOp::Num_Intrinsics), |
7458 | | "Intrinsic lowering table must be updated to account for new intrinsics."); |
7459 | | |
7460 | | static void TranslateBuiltinIntrinsic(CallInst *CI, |
7461 | | HLOperationLowerHelper &helper, |
7462 | | HLObjectOperationLowerHelper *pObjHelper, |
7463 | 65.2k | bool &Translated) { |
7464 | 65.2k | unsigned opcode = hlsl::GetHLOpcode(CI); |
7465 | 65.2k | const IntrinsicLower &lower = gLowerTable[opcode]; |
7466 | 65.2k | Value *Result = lower.LowerFunc(CI, lower.IntriOpcode, lower.DxilOpcode, |
7467 | 65.2k | helper, pObjHelper, Translated); |
7468 | 65.2k | if (Result) |
7469 | 40.3k | CI->replaceAllUsesWith(Result); |
7470 | 65.2k | } |
7471 | | |
7472 | | // SharedMem. |
7473 | | namespace { |
7474 | | |
7475 | 496 | bool IsSharedMemPtr(Value *Ptr) { |
7476 | 496 | return Ptr->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace; |
7477 | 496 | } |
7478 | | |
7479 | 496 | bool IsLocalVariablePtr(Value *Ptr) { |
7480 | 1.10k | while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { |
7481 | 608 | Ptr = GEP->getPointerOperand(); |
7482 | 608 | } |
7483 | 496 | bool isAlloca = isa<AllocaInst>(Ptr); |
7484 | 496 | if (isAlloca) |
7485 | 0 | return true; |
7486 | | |
7487 | 496 | GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); |
7488 | 496 | if (!GV) |
7489 | 496 | return false; |
7490 | | |
7491 | 0 | return GV->getLinkage() == GlobalValue::LinkageTypes::InternalLinkage; |
7492 | 496 | } |
7493 | | |
7494 | | } // namespace |
7495 | | |
7496 | | // Constant buffer. |
7497 | | namespace { |
7498 | 2.31k | unsigned GetEltTypeByteSizeForConstBuf(Type *EltType, const DataLayout &DL) { |
7499 | 2.31k | DXASSERT(EltType->isIntegerTy() || EltType->isFloatingPointTy(), |
7500 | 2.31k | "not an element type"); |
7501 | | // TODO: Use real size after change constant buffer into linear layout. |
7502 | 2.31k | if (DL.getTypeSizeInBits(EltType) <= 32) { |
7503 | | // Constant buffer is 4 bytes align. |
7504 | 2.26k | return 4; |
7505 | 2.26k | } |
7506 | | |
7507 | 48 | return 8; |
7508 | 2.31k | } |
7509 | | |
7510 | | Value *GenerateCBLoad(Value *handle, Value *offset, Type *EltTy, OP *hlslOP, |
7511 | 0 | IRBuilder<> &Builder) { |
7512 | 0 | Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoad); |
7513 | 0 |
|
7514 | 0 | DXASSERT(!EltTy->isIntegerTy(1), |
7515 | 0 | "Bools should not be loaded as their register representation."); |
7516 | 0 |
|
7517 | 0 | // Align to 8 bytes for now. |
7518 | 0 | Constant *align = hlslOP->GetU32Const(8); |
7519 | 0 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoad, EltTy); |
7520 | 0 | return Builder.CreateCall(CBLoad, {OpArg, handle, offset, align}); |
7521 | 0 | } |
7522 | | |
7523 | | Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset, |
7524 | | bool colMajor, OP *OP, const DataLayout &DL, |
7525 | 0 | IRBuilder<> &Builder) { |
7526 | 0 | HLMatrixType MatTy = HLMatrixType::cast(matType); |
7527 | 0 | Type *EltTy = MatTy.getElementTypeForMem(); |
7528 | 0 | unsigned matSize = MatTy.getNumElements(); |
7529 | 0 | std::vector<Value *> elts(matSize); |
7530 | 0 | Value *EltByteSize = ConstantInt::get( |
7531 | 0 | offset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL)); |
7532 | 0 |
|
7533 | 0 | // TODO: use real size after change constant buffer into linear layout. |
7534 | 0 | Value *baseOffset = offset; |
7535 | 0 | for (unsigned i = 0; i < matSize; i++) { |
7536 | 0 | elts[i] = GenerateCBLoad(handle, baseOffset, EltTy, OP, Builder); |
7537 | 0 | baseOffset = Builder.CreateAdd(baseOffset, EltByteSize); |
7538 | 0 | } |
7539 | 0 |
|
7540 | 0 | Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder); |
7541 | 0 | Vec = MatTy.emitLoweredMemToReg(Vec, Builder); |
7542 | 0 | return Vec; |
7543 | 0 | } |
7544 | | |
7545 | | void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset, |
7546 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
7547 | | DxilFieldAnnotation *prevFieldAnnotation, |
7548 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
7549 | | HLObjectOperationLowerHelper *pObjHelper); |
7550 | | |
7551 | | Value *GenerateVecEltFromGEP(Value *ldData, GetElementPtrInst *GEP, |
7552 | 104 | IRBuilder<> &Builder, bool bInsertLdNextToGEP) { |
7553 | 104 | DXASSERT(GEP->getNumIndices() == 2, "must have 2 level"); |
7554 | 104 | Value *baseIdx = (GEP->idx_begin())->get(); |
7555 | 104 | Value *zeroIdx = Builder.getInt32(0); |
7556 | 104 | DXASSERT_LOCALVAR(baseIdx && zeroIdx, baseIdx == zeroIdx, |
7557 | 104 | "base index must be 0"); |
7558 | 104 | Value *idx = (GEP->idx_begin() + 1)->get(); |
7559 | 104 | if (dyn_cast<ConstantInt>(idx)) { |
7560 | 56 | return Builder.CreateExtractElement(ldData, idx); |
7561 | 56 | } |
7562 | | |
7563 | | // Dynamic indexing. |
7564 | | // Copy vec to array. |
7565 | 48 | Type *Ty = ldData->getType(); |
7566 | 48 | Type *EltTy = Ty->getVectorElementType(); |
7567 | 48 | unsigned vecSize = Ty->getVectorNumElements(); |
7568 | 48 | ArrayType *AT = ArrayType::get(EltTy, vecSize); |
7569 | 48 | IRBuilder<> AllocaBuilder( |
7570 | 48 | GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt()); |
7571 | 48 | Value *tempArray = AllocaBuilder.CreateAlloca(AT); |
7572 | 48 | Value *zero = Builder.getInt32(0); |
7573 | 240 | for (unsigned int i = 0; i < vecSize; i++192 ) { |
7574 | 192 | Value *Elt = Builder.CreateExtractElement(ldData, Builder.getInt32(i)); |
7575 | 192 | Value *Ptr = |
7576 | 192 | Builder.CreateInBoundsGEP(tempArray, {zero, Builder.getInt32(i)}); |
7577 | 192 | Builder.CreateStore(Elt, Ptr); |
7578 | 192 | } |
7579 | | // Load from temp array. |
7580 | 48 | if (bInsertLdNextToGEP) { |
7581 | | // Insert the new GEP just before the old and to-be-deleted GEP |
7582 | 32 | Builder.SetInsertPoint(GEP); |
7583 | 32 | } |
7584 | 48 | Value *EltGEP = Builder.CreateInBoundsGEP(tempArray, {zero, idx}); |
7585 | 48 | return Builder.CreateLoad(EltGEP); |
7586 | 104 | } |
7587 | | |
7588 | | void TranslateResourceInCB(LoadInst *LI, |
7589 | | HLObjectOperationLowerHelper *pObjHelper, |
7590 | 314 | GlobalVariable *CbGV) { |
7591 | 314 | if (LI->user_empty()) { |
7592 | 0 | LI->eraseFromParent(); |
7593 | 0 | return; |
7594 | 0 | } |
7595 | | |
7596 | 314 | GetElementPtrInst *Ptr = cast<GetElementPtrInst>(LI->getPointerOperand()); |
7597 | 314 | CallInst *CI = cast<CallInst>(LI->user_back()); |
7598 | 314 | CallInst *Anno = cast<CallInst>(CI->user_back()); |
7599 | 314 | DxilResourceProperties RP = pObjHelper->GetResPropsFromAnnotateHandle(Anno); |
7600 | 314 | Value *ResPtr = pObjHelper->GetOrCreateResourceForCbPtr(Ptr, CbGV, RP); |
7601 | | |
7602 | | // Lower Ptr to GV base Ptr. |
7603 | 314 | Value *GvPtr = pObjHelper->LowerCbResourcePtr(Ptr, ResPtr); |
7604 | 314 | IRBuilder<> Builder(LI); |
7605 | 314 | Value *GvLd = Builder.CreateLoad(GvPtr); |
7606 | 314 | LI->replaceAllUsesWith(GvLd); |
7607 | 314 | LI->eraseFromParent(); |
7608 | 314 | } |
7609 | | |
7610 | | void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset, |
7611 | | hlsl::OP *hlslOP, |
7612 | | DxilFieldAnnotation *prevFieldAnnotation, |
7613 | | DxilTypeSystem &dxilTypeSys, const DataLayout &DL, |
7614 | 0 | HLObjectOperationLowerHelper *pObjHelper) { |
7615 | 0 | IRBuilder<> Builder(user); |
7616 | 0 | if (CallInst *CI = dyn_cast<CallInst>(user)) { |
7617 | 0 | HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); |
7618 | 0 | unsigned opcode = GetHLOpcode(CI); |
7619 | 0 | if (group == HLOpcodeGroup::HLMatLoadStore) { |
7620 | 0 | HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode); |
7621 | 0 | bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad; |
7622 | 0 | DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad || |
7623 | 0 | matOp == HLMatLoadStoreOpcode::RowMatLoad, |
7624 | 0 | "No store on cbuffer"); |
7625 | 0 | Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) |
7626 | 0 | ->getType() |
7627 | 0 | ->getPointerElementType(); |
7628 | 0 | Value *newLd = TranslateConstBufMatLd(matType, handle, baseOffset, |
7629 | 0 | colMajor, hlslOP, DL, Builder); |
7630 | 0 | CI->replaceAllUsesWith(newLd); |
7631 | 0 | CI->eraseFromParent(); |
7632 | 0 | } else if (group == HLOpcodeGroup::HLSubscript) { |
7633 | 0 | HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode); |
7634 | 0 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
7635 | 0 | HLMatrixType MatTy = |
7636 | 0 | HLMatrixType::cast(basePtr->getType()->getPointerElementType()); |
7637 | 0 | Type *EltTy = MatTy.getElementTypeForReg(); |
7638 | 0 |
|
7639 | 0 | Value *EltByteSize = ConstantInt::get( |
7640 | 0 | baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL)); |
7641 | 0 |
|
7642 | 0 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
7643 | 0 |
|
7644 | 0 | Type *resultType = CI->getType()->getPointerElementType(); |
7645 | 0 | unsigned resultSize = 1; |
7646 | 0 | if (resultType->isVectorTy()) |
7647 | 0 | resultSize = resultType->getVectorNumElements(); |
7648 | 0 | DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix"); |
7649 | 0 | assert(resultSize <= 16); |
7650 | 0 | Value *idxList[16]; |
7651 | 0 |
|
7652 | 0 | switch (subOp) { |
7653 | 0 | case HLSubscriptOpcode::ColMatSubscript: |
7654 | 0 | case HLSubscriptOpcode::RowMatSubscript: { |
7655 | 0 | for (unsigned i = 0; i < resultSize; i++) { |
7656 | 0 | Value *idx = |
7657 | 0 | CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); |
7658 | 0 | Value *offset = Builder.CreateMul(idx, EltByteSize); |
7659 | 0 | idxList[i] = Builder.CreateAdd(baseOffset, offset); |
7660 | 0 | } |
7661 | 0 |
|
7662 | 0 | } break; |
7663 | 0 | case HLSubscriptOpcode::RowMatElement: |
7664 | 0 | case HLSubscriptOpcode::ColMatElement: { |
7665 | 0 | Constant *EltIdxs = cast<Constant>(idx); |
7666 | 0 | for (unsigned i = 0; i < resultSize; i++) { |
7667 | 0 | Value *offset = |
7668 | 0 | Builder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize); |
7669 | 0 | idxList[i] = Builder.CreateAdd(baseOffset, offset); |
7670 | 0 | } |
7671 | 0 | } break; |
7672 | 0 | default: |
7673 | 0 | DXASSERT(0, "invalid operation on const buffer"); |
7674 | 0 | break; |
7675 | 0 | } |
7676 | 0 |
|
7677 | 0 | Value *ldData = UndefValue::get(resultType); |
7678 | 0 | if (resultType->isVectorTy()) { |
7679 | 0 | for (unsigned i = 0; i < resultSize; i++) { |
7680 | 0 | Value *eltData = |
7681 | 0 | GenerateCBLoad(handle, idxList[i], EltTy, hlslOP, Builder); |
7682 | 0 | ldData = Builder.CreateInsertElement(ldData, eltData, i); |
7683 | 0 | } |
7684 | 0 | } else { |
7685 | 0 | ldData = GenerateCBLoad(handle, idxList[0], EltTy, hlslOP, Builder); |
7686 | 0 | } |
7687 | 0 |
|
7688 | 0 | for (auto U = CI->user_begin(); U != CI->user_end();) { |
7689 | 0 | Value *subsUser = *(U++); |
7690 | 0 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) { |
7691 | 0 | Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder, |
7692 | 0 | /*bInsertLdNextToGEP*/ true); |
7693 | 0 |
|
7694 | 0 | for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { |
7695 | 0 | Value *gepUser = *(gepU++); |
7696 | 0 | // Must be load here; |
7697 | 0 | LoadInst *ldUser = cast<LoadInst>(gepUser); |
7698 | 0 | ldUser->replaceAllUsesWith(subData); |
7699 | 0 | ldUser->eraseFromParent(); |
7700 | 0 | } |
7701 | 0 | GEP->eraseFromParent(); |
7702 | 0 | } else { |
7703 | 0 | // Must be load here. |
7704 | 0 | LoadInst *ldUser = cast<LoadInst>(subsUser); |
7705 | 0 | ldUser->replaceAllUsesWith(ldData); |
7706 | 0 | ldUser->eraseFromParent(); |
7707 | 0 | } |
7708 | 0 | } |
7709 | 0 |
|
7710 | 0 | CI->eraseFromParent(); |
7711 | 0 | } else { |
7712 | 0 | DXASSERT(0, "not implemented yet"); |
7713 | 0 | } |
7714 | 0 | } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) { |
7715 | 0 | Type *Ty = ldInst->getType(); |
7716 | 0 | Type *EltTy = Ty->getScalarType(); |
7717 | 0 | // Resource inside cbuffer is lowered after GenerateDxilOperations. |
7718 | 0 | if (dxilutil::IsHLSLObjectType(Ty)) { |
7719 | 0 | CallInst *CI = cast<CallInst>(handle); |
7720 | 0 | // CI should be annotate handle. |
7721 | 0 | // Need createHandle here. |
7722 | 0 | if (GetHLOpcodeGroup(CI->getCalledFunction()) == |
7723 | 0 | HLOpcodeGroup::HLAnnotateHandle) |
7724 | 0 | CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx)); |
7725 | 0 | GlobalVariable *CbGV = cast<GlobalVariable>( |
7726 | 0 | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); |
7727 | 0 | TranslateResourceInCB(ldInst, pObjHelper, CbGV); |
7728 | 0 | return; |
7729 | 0 | } |
7730 | 0 | DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass"); |
7731 | 0 |
|
7732 | 0 | unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL); |
7733 | 0 |
|
7734 | 0 | Value *newLd = GenerateCBLoad(handle, baseOffset, EltTy, hlslOP, Builder); |
7735 | 0 | if (Ty->isVectorTy()) { |
7736 | 0 | Value *result = UndefValue::get(Ty); |
7737 | 0 | result = Builder.CreateInsertElement(result, newLd, (uint64_t)0); |
7738 | 0 | // Update offset by 4 bytes. |
7739 | 0 | Value *offset = |
7740 | 0 | Builder.CreateAdd(baseOffset, hlslOP->GetU32Const(EltByteSize)); |
7741 | 0 | for (unsigned i = 1; i < Ty->getVectorNumElements(); i++) { |
7742 | 0 | Value *elt = GenerateCBLoad(handle, offset, EltTy, hlslOP, Builder); |
7743 | 0 | result = Builder.CreateInsertElement(result, elt, i); |
7744 | 0 | // Update offset by 4 bytes. |
7745 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(EltByteSize)); |
7746 | 0 | } |
7747 | 0 | newLd = result; |
7748 | 0 | } |
7749 | 0 |
|
7750 | 0 | ldInst->replaceAllUsesWith(newLd); |
7751 | 0 | ldInst->eraseFromParent(); |
7752 | 0 | } else { |
7753 | 0 | // Must be GEP here |
7754 | 0 | GetElementPtrInst *GEP = cast<GetElementPtrInst>(user); |
7755 | 0 | TranslateCBGep(GEP, handle, baseOffset, hlslOP, Builder, |
7756 | 0 | prevFieldAnnotation, DL, dxilTypeSys, pObjHelper); |
7757 | 0 | GEP->eraseFromParent(); |
7758 | 0 | } |
7759 | 0 | } |
7760 | | |
7761 | | void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset, |
7762 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
7763 | | DxilFieldAnnotation *prevFieldAnnotation, |
7764 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
7765 | 0 | HLObjectOperationLowerHelper *pObjHelper) { |
7766 | 0 | SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); |
7767 | 0 |
|
7768 | 0 | Value *offset = baseOffset; |
7769 | 0 | // update offset |
7770 | 0 | DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation; |
7771 | 0 |
|
7772 | 0 | gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); |
7773 | 0 |
|
7774 | 0 | for (; GEPIt != E; GEPIt++) { |
7775 | 0 | Value *idx = GEPIt.getOperand(); |
7776 | 0 | unsigned immIdx = 0; |
7777 | 0 | bool bImmIdx = false; |
7778 | 0 | if (Constant *constIdx = dyn_cast<Constant>(idx)) { |
7779 | 0 | immIdx = constIdx->getUniqueInteger().getLimitedValue(); |
7780 | 0 | bImmIdx = true; |
7781 | 0 | } |
7782 | 0 |
|
7783 | 0 | if (GEPIt->isPointerTy()) { |
7784 | 0 | Type *EltTy = GEPIt->getPointerElementType(); |
7785 | 0 | unsigned size = 0; |
7786 | 0 | if (StructType *ST = dyn_cast<StructType>(EltTy)) { |
7787 | 0 | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
7788 | 0 | size = annotation->GetCBufferSize(); |
7789 | 0 | } else { |
7790 | 0 | DXASSERT(fieldAnnotation, "must be a field"); |
7791 | 0 | if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) { |
7792 | 0 | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
7793 | 0 | *fieldAnnotation, EltTy, dxilTypeSys); |
7794 | 0 |
|
7795 | 0 | // Decide the nested array size. |
7796 | 0 | unsigned nestedArraySize = 1; |
7797 | 0 |
|
7798 | 0 | Type *EltTy = AT->getArrayElementType(); |
7799 | 0 | // support multi level of array |
7800 | 0 | while (EltTy->isArrayTy()) { |
7801 | 0 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
7802 | 0 | nestedArraySize *= EltAT->getNumElements(); |
7803 | 0 | EltTy = EltAT->getElementType(); |
7804 | 0 | } |
7805 | 0 | // Align to 4 * 4 bytes. |
7806 | 0 | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
7807 | 0 | size = nestedArraySize * alignedSize; |
7808 | 0 | } else { |
7809 | 0 | size = DL.getTypeAllocSize(EltTy); |
7810 | 0 | } |
7811 | 0 | } |
7812 | 0 | // Align to 4 * 4 bytes. |
7813 | 0 | size = (size + 15) & 0xfffffff0; |
7814 | 0 | if (bImmIdx) { |
7815 | 0 | unsigned tempOffset = size * immIdx; |
7816 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset)); |
7817 | 0 | } else { |
7818 | 0 | Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size)); |
7819 | 0 | offset = Builder.CreateAdd(offset, tempOffset); |
7820 | 0 | } |
7821 | 0 | } else if (GEPIt->isStructTy()) { |
7822 | 0 | StructType *ST = cast<StructType>(*GEPIt); |
7823 | 0 | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
7824 | 0 | fieldAnnotation = &annotation->GetFieldAnnotation(immIdx); |
7825 | 0 | unsigned structOffset = fieldAnnotation->GetCBufferOffset(); |
7826 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(structOffset)); |
7827 | 0 | } else if (GEPIt->isArrayTy()) { |
7828 | 0 | DXASSERT(fieldAnnotation != nullptr, "must a field"); |
7829 | 0 | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
7830 | 0 | *fieldAnnotation, *GEPIt, dxilTypeSys); |
7831 | 0 | // Decide the nested array size. |
7832 | 0 | unsigned nestedArraySize = 1; |
7833 | 0 |
|
7834 | 0 | Type *EltTy = GEPIt->getArrayElementType(); |
7835 | 0 | // support multi level of array |
7836 | 0 | while (EltTy->isArrayTy()) { |
7837 | 0 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
7838 | 0 | nestedArraySize *= EltAT->getNumElements(); |
7839 | 0 | EltTy = EltAT->getElementType(); |
7840 | 0 | } |
7841 | 0 | // Align to 4 * 4 bytes. |
7842 | 0 | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
7843 | 0 | unsigned size = nestedArraySize * alignedSize; |
7844 | 0 | if (bImmIdx) { |
7845 | 0 | unsigned tempOffset = size * immIdx; |
7846 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset)); |
7847 | 0 | } else { |
7848 | 0 | Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size)); |
7849 | 0 | offset = Builder.CreateAdd(offset, tempOffset); |
7850 | 0 | } |
7851 | 0 | } else if (GEPIt->isVectorTy()) { |
7852 | 0 | unsigned size = DL.getTypeAllocSize(GEPIt->getVectorElementType()); |
7853 | 0 | if (bImmIdx) { |
7854 | 0 | unsigned tempOffset = size * immIdx; |
7855 | 0 | offset = Builder.CreateAdd(offset, hlslOP->GetU32Const(tempOffset)); |
7856 | 0 | } else { |
7857 | 0 | Value *tempOffset = Builder.CreateMul(idx, hlslOP->GetU32Const(size)); |
7858 | 0 | offset = Builder.CreateAdd(offset, tempOffset); |
7859 | 0 | } |
7860 | 0 | } else { |
7861 | 0 | gep_type_iterator temp = GEPIt; |
7862 | 0 | temp++; |
7863 | 0 | DXASSERT(temp == E, "scalar type must be the last"); |
7864 | 0 | } |
7865 | 0 | } |
7866 | 0 |
|
7867 | 0 | for (auto U = GEP->user_begin(); U != GEP->user_end();) { |
7868 | 0 | Instruction *user = cast<Instruction>(*(U++)); |
7869 | 0 |
|
7870 | 0 | TranslateCBAddressUser(user, handle, offset, hlslOP, fieldAnnotation, |
7871 | 0 | dxilTypeSys, DL, pObjHelper); |
7872 | 0 | } |
7873 | 0 | } |
7874 | | |
7875 | | Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx, |
7876 | | unsigned channelOffset, Type *EltTy, OP *hlslOP, |
7877 | 15.0k | IRBuilder<> &Builder) { |
7878 | 15.0k | Constant *OpArg = |
7879 | 15.0k | hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy); |
7880 | | |
7881 | 15.0k | DXASSERT(!EltTy->isIntegerTy(1), |
7882 | 15.0k | "Bools should not be loaded as their register representation."); |
7883 | | |
7884 | 15.0k | Type *doubleTy = Type::getDoubleTy(EltTy->getContext()); |
7885 | 15.0k | Type *halfTy = Type::getHalfTy(EltTy->getContext()); |
7886 | 15.0k | Type *i64Ty = Type::getInt64Ty(EltTy->getContext()); |
7887 | 15.0k | Type *i16Ty = Type::getInt16Ty(EltTy->getContext()); |
7888 | | |
7889 | 15.0k | bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty); |
7890 | 15.0k | bool is16 = (EltTy == halfTy || EltTy == i16Ty14.5k ) && !hlslOP->UseMinPrecision()762 ; |
7891 | 15.0k | DXASSERT_LOCALVAR(is16, (is16 && channelOffset < 8) || channelOffset < 4, |
7892 | 15.0k | "legacy cbuffer don't across 16 bytes register."); |
7893 | 15.0k | if (is64) { |
7894 | 428 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
7895 | 428 | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
7896 | 428 | DXASSERT((channelOffset & 1) == 0, |
7897 | 428 | "channel offset must be even for double"); |
7898 | 428 | unsigned eltIdx = channelOffset >> 1; |
7899 | 428 | Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx); |
7900 | 428 | return Result; |
7901 | 428 | } |
7902 | | |
7903 | 14.6k | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
7904 | 14.6k | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
7905 | 14.6k | return Builder.CreateExtractValue(loadLegacy, channelOffset); |
7906 | 15.0k | } |
7907 | | |
7908 | | Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx, |
7909 | | unsigned channelOffset, Type *EltTy, |
7910 | | unsigned vecSize, OP *hlslOP, |
7911 | 14.7k | IRBuilder<> &Builder) { |
7912 | 14.7k | Constant *OpArg = |
7913 | 14.7k | hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy); |
7914 | | |
7915 | 14.7k | DXASSERT(!EltTy->isIntegerTy(1), |
7916 | 14.7k | "Bools should not be loaded as their register representation."); |
7917 | | |
7918 | 14.7k | Type *doubleTy = Type::getDoubleTy(EltTy->getContext()); |
7919 | 14.7k | Type *i64Ty = Type::getInt64Ty(EltTy->getContext()); |
7920 | 14.7k | Type *halfTy = Type::getHalfTy(EltTy->getContext()); |
7921 | 14.7k | Type *shortTy = Type::getInt16Ty(EltTy->getContext()); |
7922 | | |
7923 | 14.7k | bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty); |
7924 | 14.7k | bool is16 = |
7925 | 14.7k | (EltTy == shortTy || EltTy == halfTy14.5k ) && !hlslOP->UseMinPrecision()898 ; |
7926 | 14.7k | DXASSERT((is16 && channelOffset + vecSize <= 8) || |
7927 | 14.7k | (channelOffset + vecSize) <= 4, |
7928 | 14.7k | "legacy cbuffer don't across 16 bytes register."); |
7929 | 14.7k | if (is16) { |
7930 | 536 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
7931 | 536 | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
7932 | 536 | Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); |
7933 | 2.06k | for (unsigned i = 0; i < vecSize; ++i1.53k ) { |
7934 | 1.53k | Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i); |
7935 | 1.53k | Result = Builder.CreateInsertElement(Result, NewElt, i); |
7936 | 1.53k | } |
7937 | 536 | return Result; |
7938 | 536 | } |
7939 | | |
7940 | 14.2k | if (is64) { |
7941 | 76 | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
7942 | 76 | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
7943 | 76 | Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); |
7944 | 76 | unsigned smallVecSize = 2; |
7945 | 76 | if (vecSize < smallVecSize) |
7946 | 0 | smallVecSize = vecSize; |
7947 | 228 | for (unsigned i = 0; i < smallVecSize; ++i152 ) { |
7948 | 152 | Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i); |
7949 | 152 | Result = Builder.CreateInsertElement(Result, NewElt, i); |
7950 | 152 | } |
7951 | 76 | if (vecSize > 2) { |
7952 | | // Got to next cb register. |
7953 | 68 | legacyIdx = Builder.CreateAdd(legacyIdx, hlslOP->GetU32Const(1)); |
7954 | 68 | Value *loadLegacy = |
7955 | 68 | Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
7956 | 204 | for (unsigned i = 2; i < vecSize; ++i136 ) { |
7957 | 136 | Value *NewElt = Builder.CreateExtractValue(loadLegacy, i - 2); |
7958 | 136 | Result = Builder.CreateInsertElement(Result, NewElt, i); |
7959 | 136 | } |
7960 | 68 | } |
7961 | 76 | return Result; |
7962 | 76 | } |
7963 | | |
7964 | 14.1k | Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, EltTy); |
7965 | 14.1k | Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx}); |
7966 | 14.1k | Value *Result = UndefValue::get(VectorType::get(EltTy, vecSize)); |
7967 | 62.4k | for (unsigned i = 0; i < vecSize; ++i48.3k ) { |
7968 | 48.3k | Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset + i); |
7969 | 48.3k | Result = Builder.CreateInsertElement(Result, NewElt, i); |
7970 | 48.3k | } |
7971 | 14.1k | return Result; |
7972 | 14.2k | } |
7973 | | |
7974 | | Value *TranslateConstBufMatLdLegacy(HLMatrixType MatTy, Value *handle, |
7975 | | Value *legacyIdx, bool colMajor, OP *OP, |
7976 | | bool memElemRepr, const DataLayout &DL, |
7977 | 2.17k | IRBuilder<> &Builder) { |
7978 | 2.17k | Type *EltTy = MatTy.getElementTypeForMem(); |
7979 | | |
7980 | 2.17k | unsigned matSize = MatTy.getNumElements(); |
7981 | 2.17k | std::vector<Value *> elts(matSize); |
7982 | 2.17k | unsigned EltByteSize = GetEltTypeByteSizeForConstBuf(EltTy, DL); |
7983 | 2.17k | if (colMajor) { |
7984 | 1.72k | unsigned colByteSize = 4 * EltByteSize; |
7985 | 1.72k | unsigned colRegSize = (colByteSize + 15) >> 4; |
7986 | 7.72k | for (unsigned c = 0; c < MatTy.getNumColumns(); c++6.00k ) { |
7987 | 6.00k | Value *col = GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0, |
7988 | 6.00k | EltTy, MatTy.getNumRows(), OP, Builder); |
7989 | | |
7990 | 27.6k | for (unsigned r = 0; r < MatTy.getNumRows(); r++21.6k ) { |
7991 | 21.6k | unsigned matIdx = MatTy.getColumnMajorIndex(r, c); |
7992 | 21.6k | elts[matIdx] = Builder.CreateExtractElement(col, r); |
7993 | 21.6k | } |
7994 | | // Update offset for a column. |
7995 | 6.00k | legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(colRegSize)); |
7996 | 6.00k | } |
7997 | 1.72k | } else { |
7998 | 448 | unsigned rowByteSize = 4 * EltByteSize; |
7999 | 448 | unsigned rowRegSize = (rowByteSize + 15) >> 4; |
8000 | 1.73k | for (unsigned r = 0; r < MatTy.getNumRows(); r++1.28k ) { |
8001 | 1.28k | Value *row = |
8002 | 1.28k | GenerateCBLoadLegacy(handle, legacyIdx, /*channelOffset*/ 0, EltTy, |
8003 | 1.28k | MatTy.getNumColumns(), OP, Builder); |
8004 | 5.32k | for (unsigned c = 0; c < MatTy.getNumColumns(); c++4.03k ) { |
8005 | 4.03k | unsigned matIdx = MatTy.getRowMajorIndex(r, c); |
8006 | 4.03k | elts[matIdx] = Builder.CreateExtractElement(row, c); |
8007 | 4.03k | } |
8008 | | // Update offset for a row. |
8009 | 1.28k | legacyIdx = Builder.CreateAdd(legacyIdx, OP->GetU32Const(rowRegSize)); |
8010 | 1.28k | } |
8011 | 448 | } |
8012 | | |
8013 | 2.17k | Value *Vec = HLMatrixLower::BuildVector(EltTy, elts, Builder); |
8014 | 2.17k | if (!memElemRepr) |
8015 | 1.86k | Vec = MatTy.emitLoweredMemToReg(Vec, Builder); |
8016 | 2.17k | return Vec; |
8017 | 2.17k | } |
8018 | | |
8019 | | void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle, |
8020 | | Value *legacyIdx, unsigned channelOffset, |
8021 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
8022 | | DxilFieldAnnotation *prevFieldAnnotation, |
8023 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
8024 | | HLObjectOperationLowerHelper *pObjHelper); |
8025 | | |
8026 | | void TranslateCBAddressUserLegacy(Instruction *user, Value *handle, |
8027 | | Value *legacyIdx, unsigned channelOffset, |
8028 | | hlsl::OP *hlslOP, |
8029 | | DxilFieldAnnotation *prevFieldAnnotation, |
8030 | | DxilTypeSystem &dxilTypeSys, |
8031 | | const DataLayout &DL, |
8032 | 43.0k | HLObjectOperationLowerHelper *pObjHelper) { |
8033 | 43.0k | IRBuilder<> Builder(user); |
8034 | 43.0k | if (CallInst *CI = dyn_cast<CallInst>(user)) { |
8035 | 2.23k | HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); |
8036 | 2.23k | if (group == HLOpcodeGroup::HLMatLoadStore) { |
8037 | 1.86k | unsigned opcode = GetHLOpcode(CI); |
8038 | 1.86k | HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode); |
8039 | 1.86k | bool colMajor = matOp == HLMatLoadStoreOpcode::ColMatLoad; |
8040 | 1.86k | DXASSERT(matOp == HLMatLoadStoreOpcode::ColMatLoad || |
8041 | 1.86k | matOp == HLMatLoadStoreOpcode::RowMatLoad, |
8042 | 1.86k | "No store on cbuffer"); |
8043 | 1.86k | HLMatrixType MatTy = |
8044 | 1.86k | HLMatrixType::cast(CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) |
8045 | 1.86k | ->getType() |
8046 | 1.86k | ->getPointerElementType()); |
8047 | | // This will replace a call, so we should use the register representation |
8048 | | // of elements |
8049 | 1.86k | Value *newLd = TranslateConstBufMatLdLegacy( |
8050 | 1.86k | MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ false, DL, |
8051 | 1.86k | Builder); |
8052 | 1.86k | CI->replaceAllUsesWith(newLd); |
8053 | 1.86k | dxilutil::TryScatterDebugValueToVectorElements(newLd); |
8054 | 1.86k | CI->eraseFromParent(); |
8055 | 1.86k | } else if (370 group == HLOpcodeGroup::HLSubscript370 ) { |
8056 | 350 | unsigned opcode = GetHLOpcode(CI); |
8057 | 350 | HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode); |
8058 | 350 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
8059 | 350 | HLMatrixType MatTy = |
8060 | 350 | HLMatrixType::cast(basePtr->getType()->getPointerElementType()); |
8061 | 350 | Type *EltTy = MatTy.getElementTypeForReg(); |
8062 | | |
8063 | 350 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
8064 | | |
8065 | 350 | Type *resultType = CI->getType()->getPointerElementType(); |
8066 | 350 | unsigned resultSize = 1; |
8067 | 350 | if (resultType->isVectorTy()) |
8068 | 254 | resultSize = resultType->getVectorNumElements(); |
8069 | 350 | DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix"); |
8070 | 350 | assert(resultSize <= 16); |
8071 | 350 | Value *idxList[16]; |
8072 | 350 | bool colMajor = subOp == HLSubscriptOpcode::ColMatSubscript || |
8073 | 350 | subOp == HLSubscriptOpcode::ColMatElement178 ; |
8074 | 350 | bool dynamicIndexing = !isa<ConstantInt>(idx) && |
8075 | 350 | !isa<ConstantAggregateZero>(idx)160 && |
8076 | 350 | !isa<ConstantDataSequential>(idx)136 ; |
8077 | | |
8078 | 350 | Value *ldData = UndefValue::get(resultType); |
8079 | 350 | if (!dynamicIndexing) { |
8080 | | // This will replace a load or GEP, so we should use the memory |
8081 | | // representation of elements |
8082 | 302 | Value *matLd = TranslateConstBufMatLdLegacy( |
8083 | 302 | MatTy, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/ true, |
8084 | 302 | DL, Builder); |
8085 | | // The matLd is keep original layout, just use the idx calc in |
8086 | | // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript. |
8087 | 302 | switch (subOp) { |
8088 | 50 | case HLSubscriptOpcode::RowMatSubscript: |
8089 | 190 | case HLSubscriptOpcode::ColMatSubscript: { |
8090 | 830 | for (unsigned i = 0; i < resultSize; i++640 ) { |
8091 | 640 | idxList[i] = |
8092 | 640 | CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); |
8093 | 640 | } |
8094 | 190 | } break; |
8095 | 32 | case HLSubscriptOpcode::RowMatElement: |
8096 | 112 | case HLSubscriptOpcode::ColMatElement: { |
8097 | 112 | Constant *EltIdxs = cast<Constant>(idx); |
8098 | 264 | for (unsigned i = 0; i < resultSize; i++152 ) { |
8099 | 152 | idxList[i] = EltIdxs->getAggregateElement(i); |
8100 | 152 | } |
8101 | 112 | } break; |
8102 | 0 | default: |
8103 | 0 | DXASSERT(0, "invalid operation on const buffer"); |
8104 | 0 | break; |
8105 | 302 | } |
8106 | | |
8107 | 302 | if (resultType->isVectorTy()) { |
8108 | 902 | for (unsigned i = 0; i < resultSize; i++696 ) { |
8109 | 696 | Value *eltData = Builder.CreateExtractElement(matLd, idxList[i]); |
8110 | 696 | ldData = Builder.CreateInsertElement(ldData, eltData, i); |
8111 | 696 | } |
8112 | 206 | } else { |
8113 | 96 | Value *eltData = Builder.CreateExtractElement(matLd, idxList[0]); |
8114 | 96 | ldData = eltData; |
8115 | 96 | } |
8116 | 302 | } else { |
8117 | | // Must be matSub here. |
8118 | 48 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
8119 | | |
8120 | 48 | if (colMajor) { |
8121 | | // idx is c * row + r. |
8122 | | // For first col, c is 0, so idx is r. |
8123 | 32 | Value *one = Builder.getInt32(1); |
8124 | | // row.x = c[0].[idx] |
8125 | | // row.y = c[1].[idx] |
8126 | | // row.z = c[2].[idx] |
8127 | | // row.w = c[3].[idx] |
8128 | 32 | Value *Elts[4]; |
8129 | 32 | ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns()); |
8130 | | |
8131 | 32 | IRBuilder<> AllocaBuilder(user->getParent() |
8132 | 32 | ->getParent() |
8133 | 32 | ->getEntryBlock() |
8134 | 32 | .getFirstInsertionPt()); |
8135 | | |
8136 | 32 | Value *tempArray = AllocaBuilder.CreateAlloca(AT); |
8137 | 32 | Value *zero = AllocaBuilder.getInt32(0); |
8138 | 32 | Value *cbufIdx = legacyIdx; |
8139 | 152 | for (unsigned int c = 0; c < MatTy.getNumColumns(); c++120 ) { |
8140 | 120 | Value *ColVal = GenerateCBLoadLegacy( |
8141 | 120 | handle, cbufIdx, /*channelOffset*/ 0, EltTy, MatTy.getNumRows(), |
8142 | 120 | hlslOP, Builder); |
8143 | | // Convert ColVal to array for indexing. |
8144 | 576 | for (unsigned int r = 0; r < MatTy.getNumRows(); r++456 ) { |
8145 | 456 | Value *Elt = |
8146 | 456 | Builder.CreateExtractElement(ColVal, Builder.getInt32(r)); |
8147 | 456 | Value *Ptr = Builder.CreateInBoundsGEP( |
8148 | 456 | tempArray, {zero, Builder.getInt32(r)}); |
8149 | 456 | Builder.CreateStore(Elt, Ptr); |
8150 | 456 | } |
8151 | | |
8152 | 120 | Value *Ptr = Builder.CreateInBoundsGEP(tempArray, {zero, idx}); |
8153 | 120 | Elts[c] = Builder.CreateLoad(Ptr); |
8154 | | // Update cbufIdx. |
8155 | 120 | cbufIdx = Builder.CreateAdd(cbufIdx, one); |
8156 | 120 | } |
8157 | 32 | if (resultType->isVectorTy()) { |
8158 | 152 | for (unsigned int c = 0; c < MatTy.getNumColumns(); c++120 ) { |
8159 | 120 | ldData = Builder.CreateInsertElement(ldData, Elts[c], c); |
8160 | 120 | } |
8161 | 32 | } else { |
8162 | 0 | ldData = Elts[0]; |
8163 | 0 | } |
8164 | 32 | } else { |
8165 | | // idx is r * col + c; |
8166 | | // r = idx / col; |
8167 | 16 | Value *cCol = ConstantInt::get(idx->getType(), MatTy.getNumColumns()); |
8168 | 16 | idx = Builder.CreateUDiv(idx, cCol); |
8169 | 16 | idx = Builder.CreateAdd(idx, legacyIdx); |
8170 | | // Just return a row; 'col' is the number of columns in the row. |
8171 | 16 | ldData = GenerateCBLoadLegacy(handle, idx, /*channelOffset*/ 0, EltTy, |
8172 | 16 | MatTy.getNumColumns(), hlslOP, Builder); |
8173 | 16 | } |
8174 | 48 | if (!resultType->isVectorTy()) { |
8175 | 0 | ldData = Builder.CreateExtractElement(ldData, Builder.getInt32(0)); |
8176 | 0 | } |
8177 | 48 | } |
8178 | | |
8179 | 700 | for (auto U = CI->user_begin(); 350 U != CI->user_end();) { |
8180 | 350 | Value *subsUser = *(U++); |
8181 | 350 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) { |
8182 | 80 | Value *subData = GenerateVecEltFromGEP(ldData, GEP, Builder, |
8183 | 80 | /*bInsertLdNextToGEP*/ true); |
8184 | 160 | for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { |
8185 | 80 | Value *gepUser = *(gepU++); |
8186 | | // Must be load here; |
8187 | 80 | LoadInst *ldUser = cast<LoadInst>(gepUser); |
8188 | 80 | ldUser->replaceAllUsesWith(subData); |
8189 | 80 | ldUser->eraseFromParent(); |
8190 | 80 | } |
8191 | 80 | GEP->eraseFromParent(); |
8192 | 270 | } else { |
8193 | | // Must be load here. |
8194 | 270 | LoadInst *ldUser = cast<LoadInst>(subsUser); |
8195 | 270 | ldUser->replaceAllUsesWith(ldData); |
8196 | 270 | ldUser->eraseFromParent(); |
8197 | 270 | } |
8198 | 350 | } |
8199 | | |
8200 | 350 | CI->eraseFromParent(); |
8201 | 350 | } else if (IntrinsicInst *20 II20 = dyn_cast<IntrinsicInst>(user)) { |
8202 | 20 | if (II->getIntrinsicID() == Intrinsic::lifetime_start || |
8203 | 20 | II->getIntrinsicID() == Intrinsic::lifetime_end10 ) { |
8204 | 20 | DXASSERT(II->use_empty(), "lifetime intrinsic can't have uses"); |
8205 | 20 | II->eraseFromParent(); |
8206 | 20 | } else { |
8207 | 0 | DXASSERT(0, "not implemented yet"); |
8208 | 0 | } |
8209 | 20 | } else { |
8210 | 0 | DXASSERT(0, "not implemented yet"); |
8211 | 0 | } |
8212 | 40.8k | } else if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) { |
8213 | 22.7k | Type *Ty = ldInst->getType(); |
8214 | 22.7k | Type *EltTy = Ty->getScalarType(); |
8215 | | // Resource inside cbuffer is lowered after GenerateDxilOperations. |
8216 | 22.7k | if (dxilutil::IsHLSLObjectType(Ty)) { |
8217 | 314 | CallInst *CI = cast<CallInst>(handle); |
8218 | | // CI should be annotate handle. |
8219 | | // Need createHandle here. |
8220 | 314 | if (GetHLOpcodeGroup(CI->getCalledFunction()) == |
8221 | 314 | HLOpcodeGroup::HLAnnotateHandle) |
8222 | 314 | CI = cast<CallInst>(CI->getArgOperand(HLOperandIndex::kHandleOpIdx)); |
8223 | | |
8224 | 314 | GlobalVariable *CbGV = cast<GlobalVariable>( |
8225 | 314 | CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); |
8226 | 314 | TranslateResourceInCB(ldInst, pObjHelper, CbGV); |
8227 | 314 | return; |
8228 | 314 | } |
8229 | 22.4k | DXASSERT(!Ty->isAggregateType(), "should be flat in previous pass"); |
8230 | | |
8231 | 22.4k | Value *newLd = nullptr; |
8232 | | |
8233 | 22.4k | if (Ty->isVectorTy()) |
8234 | 7.31k | newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy, |
8235 | 7.31k | Ty->getVectorNumElements(), hlslOP, Builder); |
8236 | 15.0k | else |
8237 | 15.0k | newLd = GenerateCBLoadLegacy(handle, legacyIdx, channelOffset, EltTy, |
8238 | 15.0k | hlslOP, Builder); |
8239 | | |
8240 | 22.4k | ldInst->replaceAllUsesWith(newLd); |
8241 | 22.4k | dxilutil::TryScatterDebugValueToVectorElements(newLd); |
8242 | 22.4k | ldInst->eraseFromParent(); |
8243 | 22.4k | } else if (BitCastInst *18.1k BCI18.1k = dyn_cast<BitCastInst>(user)) { |
8244 | 64 | for (auto it = BCI->user_begin(); it != BCI->user_end();) { |
8245 | 36 | Instruction *I = cast<Instruction>(*it++); |
8246 | 36 | TranslateCBAddressUserLegacy(I, handle, legacyIdx, channelOffset, hlslOP, |
8247 | 36 | prevFieldAnnotation, dxilTypeSys, DL, |
8248 | 36 | pObjHelper); |
8249 | 36 | } |
8250 | 28 | BCI->eraseFromParent(); |
8251 | 18.0k | } else { |
8252 | | // Must be GEP here |
8253 | 18.0k | GetElementPtrInst *GEP = cast<GetElementPtrInst>(user); |
8254 | 18.0k | TranslateCBGepLegacy(GEP, handle, legacyIdx, channelOffset, hlslOP, Builder, |
8255 | 18.0k | prevFieldAnnotation, DL, dxilTypeSys, pObjHelper); |
8256 | 18.0k | GEP->eraseFromParent(); |
8257 | 18.0k | } |
8258 | 43.0k | } |
8259 | | |
8260 | | void TranslateCBGepLegacy(GetElementPtrInst *GEP, Value *handle, |
8261 | | Value *legacyIndex, unsigned channel, |
8262 | | hlsl::OP *hlslOP, IRBuilder<> &Builder, |
8263 | | DxilFieldAnnotation *prevFieldAnnotation, |
8264 | | const DataLayout &DL, DxilTypeSystem &dxilTypeSys, |
8265 | 18.0k | HLObjectOperationLowerHelper *pObjHelper) { |
8266 | 18.0k | SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); |
8267 | | |
8268 | | // update offset |
8269 | 18.0k | DxilFieldAnnotation *fieldAnnotation = prevFieldAnnotation; |
8270 | | |
8271 | 18.0k | gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); |
8272 | | |
8273 | 62.2k | for (; GEPIt != E; GEPIt++44.1k ) { |
8274 | 44.2k | Value *idx = GEPIt.getOperand(); |
8275 | 44.2k | unsigned immIdx = 0; |
8276 | 44.2k | bool bImmIdx = false; |
8277 | 44.2k | if (Constant *constIdx = dyn_cast<Constant>(idx)) { |
8278 | 41.4k | immIdx = constIdx->getUniqueInteger().getLimitedValue(); |
8279 | 41.4k | bImmIdx = true; |
8280 | 41.4k | } |
8281 | | |
8282 | 44.2k | if (GEPIt->isPointerTy()) { |
8283 | 18.0k | Type *EltTy = GEPIt->getPointerElementType(); |
8284 | 18.0k | unsigned size = 0; |
8285 | 18.0k | if (StructType *ST = dyn_cast<StructType>(EltTy)) { |
8286 | 18.0k | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
8287 | 18.0k | size = annotation->GetCBufferSize(); |
8288 | 18.0k | } else { |
8289 | 32 | DXASSERT(fieldAnnotation, "must be a field"); |
8290 | 32 | if (ArrayType *AT = dyn_cast<ArrayType>(EltTy)) { |
8291 | 32 | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
8292 | 32 | *fieldAnnotation, EltTy, dxilTypeSys); |
8293 | | |
8294 | | // Decide the nested array size. |
8295 | 32 | unsigned nestedArraySize = 1; |
8296 | | |
8297 | 32 | Type *EltTy = AT->getArrayElementType(); |
8298 | | // support multi level of array |
8299 | 40 | while (EltTy->isArrayTy()) { |
8300 | 8 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
8301 | 8 | nestedArraySize *= EltAT->getNumElements(); |
8302 | 8 | EltTy = EltAT->getElementType(); |
8303 | 8 | } |
8304 | | // Align to 4 * 4 bytes. |
8305 | 32 | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
8306 | 32 | size = nestedArraySize * alignedSize; |
8307 | 32 | } else { |
8308 | 0 | size = DL.getTypeAllocSize(EltTy); |
8309 | 0 | } |
8310 | 32 | } |
8311 | | // Skip 0 idx. |
8312 | 18.0k | if (bImmIdx && immIdx == 0) |
8313 | 18.0k | continue; |
8314 | | // Align to 4 * 4 bytes. |
8315 | 0 | size = (size + 15) & 0xfffffff0; |
8316 | | |
8317 | | // Take this as array idxing. |
8318 | 0 | if (bImmIdx) { |
8319 | 0 | unsigned tempOffset = size * immIdx; |
8320 | 0 | unsigned idxInc = tempOffset >> 4; |
8321 | 0 | legacyIndex = |
8322 | 0 | Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc)); |
8323 | 0 | } else { |
8324 | 0 | Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4)); |
8325 | 0 | legacyIndex = Builder.CreateAdd(legacyIndex, idxInc); |
8326 | 0 | } |
8327 | | |
8328 | | // Array always start from x channel. |
8329 | 0 | channel = 0; |
8330 | 26.1k | } else if (GEPIt->isStructTy()) { |
8331 | 21.4k | StructType *ST = cast<StructType>(*GEPIt); |
8332 | 21.4k | DxilStructAnnotation *annotation = dxilTypeSys.GetStructAnnotation(ST); |
8333 | 21.4k | fieldAnnotation = &annotation->GetFieldAnnotation(immIdx); |
8334 | | |
8335 | 21.4k | unsigned idxInc = 0; |
8336 | 21.4k | unsigned structOffset = 0; |
8337 | 21.4k | if (fieldAnnotation->GetCompType().Is16Bit() && |
8338 | 21.4k | !hlslOP->UseMinPrecision()1.10k ) { |
8339 | 764 | structOffset = fieldAnnotation->GetCBufferOffset() >> 1; |
8340 | 764 | channel += structOffset; |
8341 | 764 | idxInc = channel >> 3; |
8342 | 764 | channel = channel & 0x7; |
8343 | 20.7k | } else { |
8344 | 20.7k | structOffset = fieldAnnotation->GetCBufferOffset() >> 2; |
8345 | 20.7k | channel += structOffset; |
8346 | 20.7k | idxInc = channel >> 2; |
8347 | 20.7k | channel = channel & 0x3; |
8348 | 20.7k | } |
8349 | 21.4k | if (idxInc) |
8350 | 8.27k | legacyIndex = |
8351 | 8.27k | Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc)); |
8352 | 21.4k | } else if (4.64k GEPIt->isArrayTy()4.64k ) { |
8353 | 4.17k | DXASSERT(fieldAnnotation != nullptr, "must a field"); |
8354 | 4.17k | unsigned EltSize = dxilutil::GetLegacyCBufferFieldElementSize( |
8355 | 4.17k | *fieldAnnotation, *GEPIt, dxilTypeSys); |
8356 | | // Decide the nested array size. |
8357 | 4.17k | unsigned nestedArraySize = 1; |
8358 | | |
8359 | 4.17k | Type *EltTy = GEPIt->getArrayElementType(); |
8360 | | // support multi level of array |
8361 | 4.78k | while (EltTy->isArrayTy()) { |
8362 | 606 | ArrayType *EltAT = cast<ArrayType>(EltTy); |
8363 | 606 | nestedArraySize *= EltAT->getNumElements(); |
8364 | 606 | EltTy = EltAT->getElementType(); |
8365 | 606 | } |
8366 | | // Align to 4 * 4 bytes. |
8367 | 4.17k | unsigned alignedSize = (EltSize + 15) & 0xfffffff0; |
8368 | 4.17k | unsigned size = nestedArraySize * alignedSize; |
8369 | 4.17k | if (bImmIdx) { |
8370 | 1.41k | unsigned tempOffset = size * immIdx; |
8371 | 1.41k | unsigned idxInc = tempOffset >> 4; |
8372 | 1.41k | legacyIndex = |
8373 | 1.41k | Builder.CreateAdd(legacyIndex, hlslOP->GetU32Const(idxInc)); |
8374 | 2.76k | } else { |
8375 | 2.76k | Value *idxInc = Builder.CreateMul(idx, hlslOP->GetU32Const(size >> 4)); |
8376 | 2.76k | legacyIndex = Builder.CreateAdd(legacyIndex, idxInc); |
8377 | 2.76k | } |
8378 | | |
8379 | | // Array always start from x channel. |
8380 | 4.17k | channel = 0; |
8381 | 4.17k | } else if (470 GEPIt->isVectorTy()470 ) { |
8382 | | // Indexing on vector. |
8383 | 470 | if (bImmIdx) { |
8384 | 422 | if (immIdx < GEPIt->getVectorNumElements()) { |
8385 | 394 | const unsigned vectorElmSize = |
8386 | 394 | DL.getTypeAllocSize(GEPIt->getVectorElementType()); |
8387 | 394 | const bool bIs16bitType = vectorElmSize == 2; |
8388 | 394 | const unsigned tempOffset = vectorElmSize * immIdx; |
8389 | 394 | const unsigned numChannelsPerRow = bIs16bitType ? 832 : 4362 ; |
8390 | 394 | const unsigned channelInc = |
8391 | 394 | bIs16bitType ? tempOffset >> 132 : tempOffset >> 2362 ; |
8392 | | |
8393 | 394 | DXASSERT((channel + channelInc) < numChannelsPerRow, |
8394 | 394 | "vector should not cross cb register"); |
8395 | 394 | channel += channelInc; |
8396 | 394 | if (channel == numChannelsPerRow) { |
8397 | | // Get to another row. |
8398 | | // Update index and channel. |
8399 | 0 | channel = 0; |
8400 | 0 | legacyIndex = Builder.CreateAdd(legacyIndex, Builder.getInt32(1)); |
8401 | 0 | } |
8402 | 394 | } else { |
8403 | 28 | StringRef resName = "(unknown)"; |
8404 | 28 | if (DxilResourceBase *Res = |
8405 | 28 | pObjHelper->FindCBufferResourceFromHandle(handle)) { |
8406 | 28 | resName = Res->GetGlobalName(); |
8407 | 28 | } |
8408 | 28 | legacyIndex = hlsl::CreatePoisonValue( |
8409 | 28 | legacyIndex->getType(), |
8410 | 28 | Twine("Out of bounds index (") + Twine(immIdx) + |
8411 | 28 | Twine(") in CBuffer '") + Twine(resName) + ("'"), |
8412 | 28 | GEP->getDebugLoc(), GEP); |
8413 | 28 | channel = 0; |
8414 | 28 | } |
8415 | 422 | } else { |
8416 | 48 | Type *EltTy = GEPIt->getVectorElementType(); |
8417 | 48 | unsigned vecSize = GEPIt->getVectorNumElements(); |
8418 | | |
8419 | | // Load the whole register. |
8420 | 48 | Value *newLd = |
8421 | 48 | GenerateCBLoadLegacy(handle, legacyIndex, |
8422 | 48 | /*channelOffset*/ channel, EltTy, |
8423 | 48 | /*vecSize*/ vecSize, hlslOP, Builder); |
8424 | | // Copy to array. |
8425 | 48 | IRBuilder<> AllocaBuilder(GEP->getParent() |
8426 | 48 | ->getParent() |
8427 | 48 | ->getEntryBlock() |
8428 | 48 | .getFirstInsertionPt()); |
8429 | 48 | Value *tempArray = |
8430 | 48 | AllocaBuilder.CreateAlloca(ArrayType::get(EltTy, vecSize)); |
8431 | 48 | Value *zeroIdx = hlslOP->GetU32Const(0); |
8432 | 216 | for (unsigned i = 0; i < vecSize; i++168 ) { |
8433 | 168 | Value *Elt = Builder.CreateExtractElement(newLd, i); |
8434 | 168 | Value *EltGEP = Builder.CreateInBoundsGEP( |
8435 | 168 | tempArray, {zeroIdx, hlslOP->GetU32Const(i)}); |
8436 | 168 | Builder.CreateStore(Elt, EltGEP); |
8437 | 168 | } |
8438 | | // Make sure this is the end of GEP. |
8439 | 48 | gep_type_iterator temp = GEPIt; |
8440 | 48 | temp++; |
8441 | 48 | DXASSERT(temp == E, "scalar type must be the last"); |
8442 | | |
8443 | | // Replace the GEP with array GEP. |
8444 | 48 | Value *ArrayGEP = Builder.CreateInBoundsGEP(tempArray, {zeroIdx, idx}); |
8445 | 48 | GEP->replaceAllUsesWith(ArrayGEP); |
8446 | 48 | return; |
8447 | 48 | } |
8448 | 470 | } else { |
8449 | 0 | gep_type_iterator temp = GEPIt; |
8450 | 0 | temp++; |
8451 | 0 | DXASSERT(temp == E, "scalar type must be the last"); |
8452 | 0 | } |
8453 | 44.2k | } |
8454 | | |
8455 | 43.0k | for (auto U = GEP->user_begin(); 18.0k U != GEP->user_end();) { |
8456 | 24.9k | Instruction *user = cast<Instruction>(*(U++)); |
8457 | | |
8458 | 24.9k | TranslateCBAddressUserLegacy(user, handle, legacyIndex, channel, hlslOP, |
8459 | 24.9k | fieldAnnotation, dxilTypeSys, DL, pObjHelper); |
8460 | 24.9k | } |
8461 | 18.0k | } |
8462 | | |
8463 | | void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP, |
8464 | | DxilTypeSystem &dxilTypeSys, |
8465 | | const DataLayout &DL, |
8466 | 8.72k | HLObjectOperationLowerHelper *pObjHelper) { |
8467 | 8.72k | auto User = ptr->user_begin(); |
8468 | 8.72k | auto UserE = ptr->user_end(); |
8469 | 8.72k | Value *zeroIdx = hlslOP->GetU32Const(0); |
8470 | 26.7k | for (; User != UserE;) { |
8471 | | // Must be Instruction. |
8472 | 18.0k | Instruction *I = cast<Instruction>(*(User++)); |
8473 | 18.0k | TranslateCBAddressUserLegacy( |
8474 | 18.0k | I, handle, zeroIdx, /*channelOffset*/ 0, hlslOP, |
8475 | 18.0k | /*prevFieldAnnotation*/ nullptr, dxilTypeSys, DL, pObjHelper); |
8476 | 18.0k | } |
8477 | 8.72k | } |
8478 | | |
8479 | | } // namespace |
8480 | | |
8481 | | // Structured buffer. |
8482 | | namespace { |
8483 | | |
8484 | | Value *GenerateRawBufLd(Value *handle, Value *bufIdx, Value *offset, |
8485 | | Value *status, Type *EltTy, |
8486 | | MutableArrayRef<Value *> resultElts, hlsl::OP *OP, |
8487 | | IRBuilder<> &Builder, unsigned NumComponents, |
8488 | 28 | Constant *alignment) { |
8489 | 28 | OP::OpCode opcode = OP::OpCode::RawBufferLoad; |
8490 | | |
8491 | 28 | DXASSERT(resultElts.size() <= 4, |
8492 | 28 | "buffer load cannot load more than 4 values"); |
8493 | | |
8494 | 28 | if (bufIdx == nullptr) { |
8495 | | // This is actually a byte address buffer load with a struct template type. |
8496 | | // The call takes only one coordinates for the offset. |
8497 | 0 | bufIdx = offset; |
8498 | 0 | offset = UndefValue::get(offset->getType()); |
8499 | 0 | } |
8500 | | |
8501 | 28 | Function *dxilF = OP->GetOpFunc(opcode, EltTy); |
8502 | 28 | Constant *mask = GetRawBufferMaskForETy(EltTy, NumComponents, OP); |
8503 | 28 | Value *Args[] = {OP->GetU32Const((unsigned)opcode), |
8504 | 28 | handle, |
8505 | 28 | bufIdx, |
8506 | 28 | offset, |
8507 | 28 | mask, |
8508 | 28 | alignment}; |
8509 | 28 | Value *Ld = Builder.CreateCall(dxilF, Args, OP::GetOpCodeName(opcode)); |
8510 | | |
8511 | 56 | for (unsigned i = 0; i < resultElts.size(); i++28 ) { |
8512 | 28 | resultElts[i] = Builder.CreateExtractValue(Ld, i); |
8513 | 28 | } |
8514 | | |
8515 | | // status |
8516 | 28 | UpdateStatus(Ld, status, Builder, OP); |
8517 | 28 | return Ld; |
8518 | 28 | } |
8519 | | |
8520 | | void GenerateStructBufSt(Value *handle, Value *bufIdx, Value *offset, |
8521 | | Type *EltTy, hlsl::OP *OP, IRBuilder<> &Builder, |
8522 | | ArrayRef<Value *> vals, uint8_t mask, |
8523 | 60 | Constant *alignment) { |
8524 | 60 | OP::OpCode opcode = OP::OpCode::RawBufferStore; |
8525 | 60 | DXASSERT(vals.size() == 4, "buffer store need 4 values"); |
8526 | | |
8527 | 60 | Value *Args[] = {OP->GetU32Const((unsigned)opcode), |
8528 | 60 | handle, |
8529 | 60 | bufIdx, |
8530 | 60 | offset, |
8531 | 60 | vals[0], |
8532 | 60 | vals[1], |
8533 | 60 | vals[2], |
8534 | 60 | vals[3], |
8535 | 60 | OP->GetU8Const(mask), |
8536 | 60 | alignment}; |
8537 | 60 | Function *dxilF = OP->GetOpFunc(opcode, EltTy); |
8538 | 60 | Builder.CreateCall(dxilF, Args); |
8539 | 60 | } |
8540 | | |
8541 | | Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, |
8542 | | Value *handle, HLResource::Kind RK, hlsl::OP *OP, |
8543 | | Value *status, Value *bufIdx, Value *baseOffset, |
8544 | 814 | const DataLayout &DL) { |
8545 | | |
8546 | 814 | ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status); |
8547 | | #ifndef NDEBUG |
8548 | | Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); |
8549 | | Type *matType = ptr->getType()->getPointerElementType(); |
8550 | | HLMatrixType MatTy = HLMatrixType::cast(matType); |
8551 | | DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == |
8552 | | helper.retVal->getType(), |
8553 | | "helper type should match vectorized matrix"); |
8554 | | #endif |
8555 | 814 | return TranslateBufLoad(helper, RK, Builder, OP, DL); |
8556 | 814 | } |
8557 | | |
8558 | | void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, |
8559 | | hlsl::OP *OP, Value *bufIdx, Value *baseOffset, |
8560 | 1.18k | Value *val, const DataLayout &DL) { |
8561 | 1.18k | [[maybe_unused]] HLMatrixType MatTy = HLMatrixType::cast(matType); |
8562 | 1.18k | DXASSERT(MatTy.getLoweredVectorType(false /*MemRepr*/) == val->getType(), |
8563 | 1.18k | "helper type should match vectorized matrix"); |
8564 | 1.18k | TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, |
8565 | 1.18k | baseOffset, Builder, OP); |
8566 | 1.18k | } |
8567 | | |
8568 | | void TranslateStructBufMatLdSt(CallInst *CI, Value *handle, HLResource::Kind RK, |
8569 | | hlsl::OP *OP, Value *status, Value *bufIdx, |
8570 | 2.00k | Value *baseOffset, const DataLayout &DL) { |
8571 | 2.00k | IRBuilder<> Builder(CI); |
8572 | 2.00k | HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); |
8573 | 2.00k | unsigned opcode = GetHLOpcode(CI); |
8574 | 2.00k | DXASSERT_LOCALVAR(group, group == HLOpcodeGroup::HLMatLoadStore, |
8575 | 2.00k | "only translate matrix loadStore here."); |
8576 | 2.00k | HLMatLoadStoreOpcode matOp = static_cast<HLMatLoadStoreOpcode>(opcode); |
8577 | | // Due to the current way the initial codegen generates matrix |
8578 | | // orientation casts, the in-register vector matrix has already been |
8579 | | // reordered based on the destination's row or column-major packing |
8580 | | // orientation. |
8581 | 2.00k | switch (matOp) { |
8582 | 242 | case HLMatLoadStoreOpcode::RowMatLoad: |
8583 | 814 | case HLMatLoadStoreOpcode::ColMatLoad: |
8584 | 814 | TranslateStructBufMatLd(CI, Builder, handle, RK, OP, status, bufIdx, |
8585 | 814 | baseOffset, DL); |
8586 | 814 | break; |
8587 | 194 | case HLMatLoadStoreOpcode::RowMatStore: |
8588 | 1.18k | case HLMatLoadStoreOpcode::ColMatStore: { |
8589 | 1.18k | Value *ptr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); |
8590 | 1.18k | Value *val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); |
8591 | 1.18k | TranslateStructBufMatSt(ptr->getType()->getPointerElementType(), Builder, |
8592 | 1.18k | handle, OP, bufIdx, baseOffset, val, DL); |
8593 | 1.18k | } break; |
8594 | 2.00k | } |
8595 | | |
8596 | 2.00k | CI->eraseFromParent(); |
8597 | 2.00k | } |
8598 | | |
8599 | | void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, |
8600 | | HLResource::Kind ResKind, Value *bufIdx, |
8601 | | Value *baseOffset, Value *status, |
8602 | | hlsl::OP *OP, const DataLayout &DL); |
8603 | | |
8604 | | // For case like mat[i][j]. |
8605 | | // IdxList is [i][0], [i][1], [i][2],[i][3]. |
8606 | | // Idx is j. |
8607 | | // return [i][j] not mat[i][j] because resource ptr and temp ptr need different |
8608 | | // code gen. |
8609 | | static Value *LowerGEPOnMatIndexListToIndex(llvm::GetElementPtrInst *GEP, |
8610 | 24 | ArrayRef<Value *> IdxList) { |
8611 | 24 | IRBuilder<> Builder(GEP); |
8612 | 24 | Value *zero = Builder.getInt32(0); |
8613 | 24 | DXASSERT(GEP->getNumIndices() == 2, "must have 2 level"); |
8614 | 24 | Value *baseIdx = (GEP->idx_begin())->get(); |
8615 | 24 | DXASSERT_LOCALVAR(baseIdx, baseIdx == zero, "base index must be 0"); |
8616 | 24 | Value *Idx = (GEP->idx_begin() + 1)->get(); |
8617 | | |
8618 | 24 | if (ConstantInt *immIdx = dyn_cast<ConstantInt>(Idx)) { |
8619 | 16 | return IdxList[immIdx->getSExtValue()]; |
8620 | 16 | } |
8621 | | |
8622 | 8 | IRBuilder<> AllocaBuilder( |
8623 | 8 | GEP->getParent()->getParent()->getEntryBlock().getFirstInsertionPt()); |
8624 | 8 | unsigned size = IdxList.size(); |
8625 | | // Store idxList to temp array. |
8626 | 8 | ArrayType *AT = ArrayType::get(IdxList[0]->getType(), size); |
8627 | 8 | Value *tempArray = AllocaBuilder.CreateAlloca(AT); |
8628 | | |
8629 | 40 | for (unsigned i = 0; i < size; i++32 ) { |
8630 | 32 | Value *EltPtr = Builder.CreateGEP(tempArray, {zero, Builder.getInt32(i)}); |
8631 | 32 | Builder.CreateStore(IdxList[i], EltPtr); |
8632 | 32 | } |
8633 | | // Load the idx. |
8634 | 8 | Value *GEPOffset = Builder.CreateGEP(tempArray, {zero, Idx}); |
8635 | 8 | return Builder.CreateLoad(GEPOffset); |
8636 | 24 | } |
8637 | | |
8638 | | // subscript operator for matrix of struct element. |
8639 | | void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, |
8640 | | HLResource::Kind ResKind, Value *bufIdx, |
8641 | | Value *baseOffset, Value *status, |
8642 | 146 | hlsl::OP *hlslOP, const DataLayout &DL) { |
8643 | 146 | unsigned opcode = GetHLOpcode(CI); |
8644 | 146 | IRBuilder<> subBuilder(CI); |
8645 | 146 | HLSubscriptOpcode subOp = static_cast<HLSubscriptOpcode>(opcode); |
8646 | 146 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
8647 | 146 | HLMatrixType MatTy = |
8648 | 146 | HLMatrixType::cast(basePtr->getType()->getPointerElementType()); |
8649 | 146 | Type *EltTy = MatTy.getElementTypeForReg(); |
8650 | 146 | Constant *alignment = hlslOP->GetI32Const(DL.getTypeAllocSize(EltTy)); |
8651 | | |
8652 | 146 | Value *EltByteSize = ConstantInt::get( |
8653 | 146 | baseOffset->getType(), GetEltTypeByteSizeForConstBuf(EltTy, DL)); |
8654 | | |
8655 | 146 | Value *idx = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx); |
8656 | | |
8657 | 146 | Type *resultType = CI->getType()->getPointerElementType(); |
8658 | 146 | unsigned resultSize = 1; |
8659 | 146 | if (resultType->isVectorTy()) |
8660 | 90 | resultSize = resultType->getVectorNumElements(); |
8661 | 146 | DXASSERT(resultSize <= 16, "up to 4x4 elements in vector or matrix"); |
8662 | 146 | assert(resultSize <= 16); |
8663 | 146 | std::vector<Value *> idxList(resultSize); |
8664 | | |
8665 | 146 | switch (subOp) { |
8666 | 90 | case HLSubscriptOpcode::ColMatSubscript: |
8667 | 90 | case HLSubscriptOpcode::RowMatSubscript: { |
8668 | 274 | for (unsigned i = 0; i < resultSize; i++184 ) { |
8669 | 184 | Value *offset = |
8670 | 184 | CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); |
8671 | 184 | offset = subBuilder.CreateMul(offset, EltByteSize); |
8672 | 184 | idxList[i] = subBuilder.CreateAdd(baseOffset, offset); |
8673 | 184 | } |
8674 | 90 | } break; |
8675 | 0 | case HLSubscriptOpcode::RowMatElement: |
8676 | 56 | case HLSubscriptOpcode::ColMatElement: { |
8677 | 56 | Constant *EltIdxs = cast<Constant>(idx); |
8678 | 112 | for (unsigned i = 0; i < resultSize; i++56 ) { |
8679 | 56 | Value *offset = |
8680 | 56 | subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize); |
8681 | 56 | idxList[i] = subBuilder.CreateAdd(baseOffset, offset); |
8682 | 56 | } |
8683 | 56 | } break; |
8684 | 0 | default: |
8685 | 0 | DXASSERT(0, "invalid operation on const buffer"); |
8686 | 0 | break; |
8687 | 146 | } |
8688 | | |
8689 | 146 | Value *undefElt = UndefValue::get(EltTy); |
8690 | | |
8691 | 292 | for (auto U = CI->user_begin(); U != CI->user_end();) { |
8692 | 146 | Value *subsUser = *(U++); |
8693 | 146 | if (resultSize == 1) { |
8694 | 88 | TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle, |
8695 | 88 | ResKind, bufIdx, idxList[0], status, |
8696 | 88 | hlslOP, DL); |
8697 | 88 | continue; |
8698 | 88 | } |
8699 | 58 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) { |
8700 | 24 | Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList); |
8701 | | |
8702 | 48 | for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { |
8703 | 24 | Instruction *gepUserInst = cast<Instruction>(*(gepU++)); |
8704 | 24 | TranslateStructBufSubscriptUser(gepUserInst, handle, ResKind, bufIdx, |
8705 | 24 | GEPOffset, status, hlslOP, DL); |
8706 | 24 | } |
8707 | | |
8708 | 24 | GEP->eraseFromParent(); |
8709 | 34 | } else if (StoreInst *stUser = dyn_cast<StoreInst>(subsUser)) { |
8710 | | // Store elements of matrix in a struct. Needs to be done one scalar at a |
8711 | | // time even for vectors in the case that matrix orientation spreads the |
8712 | | // indexed scalars throughout the matrix vector. |
8713 | 22 | IRBuilder<> stBuilder(stUser); |
8714 | 22 | Value *Val = stUser->getValueOperand(); |
8715 | 22 | if (Val->getType()->isVectorTy()) { |
8716 | 82 | for (unsigned i = 0; i < resultSize; i++60 ) { |
8717 | 60 | Value *EltVal = stBuilder.CreateExtractElement(Val, i); |
8718 | 60 | uint8_t mask = DXIL::kCompMask_X; |
8719 | 60 | GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP, |
8720 | 60 | stBuilder, {EltVal, undefElt, undefElt, undefElt}, |
8721 | 60 | mask, alignment); |
8722 | 60 | } |
8723 | 22 | } else { |
8724 | 0 | uint8_t mask = DXIL::kCompMask_X; |
8725 | 0 | GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP, |
8726 | 0 | stBuilder, {Val, undefElt, undefElt, undefElt}, |
8727 | 0 | mask, alignment); |
8728 | 0 | } |
8729 | | |
8730 | 22 | stUser->eraseFromParent(); |
8731 | 22 | } else { |
8732 | | // Must be load here. |
8733 | 12 | LoadInst *ldUser = cast<LoadInst>(subsUser); |
8734 | 12 | IRBuilder<> ldBuilder(ldUser); |
8735 | 12 | Value *ldData = UndefValue::get(resultType); |
8736 | | // Load elements of matrix in a struct. Needs to be done one scalar at a |
8737 | | // time even for vectors in the case that matrix orientation spreads the |
8738 | | // indexed scalars throughout the matrix vector. |
8739 | 12 | if (resultType->isVectorTy()) { |
8740 | 40 | for (unsigned i = 0; i < resultSize; i++28 ) { |
8741 | 28 | Value *ResultElt; |
8742 | | // TODO: This can be inefficient for row major matrix load |
8743 | 28 | GenerateRawBufLd(handle, bufIdx, idxList[i], |
8744 | 28 | /*status*/ nullptr, EltTy, ResultElt, hlslOP, |
8745 | 28 | ldBuilder, 1, alignment); |
8746 | 28 | ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i); |
8747 | 28 | } |
8748 | 12 | } else { |
8749 | 0 | GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr, EltTy, |
8750 | 0 | ldData, hlslOP, ldBuilder, 4, alignment); |
8751 | 0 | } |
8752 | 12 | ldUser->replaceAllUsesWith(ldData); |
8753 | 12 | ldUser->eraseFromParent(); |
8754 | 12 | } |
8755 | 58 | } |
8756 | | |
8757 | 146 | CI->eraseFromParent(); |
8758 | 146 | } |
8759 | | |
8760 | | void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, |
8761 | | HLResource::Kind ResKind, Value *bufIdx, |
8762 | | Value *baseOffset, Value *status, |
8763 | 37.3k | hlsl::OP *OP, const DataLayout &DL) { |
8764 | 37.3k | IRBuilder<> Builder(user); |
8765 | 37.3k | if (CallInst *userCall = dyn_cast<CallInst>(user)) { |
8766 | 3.68k | HLOpcodeGroup group = // user call? |
8767 | 3.68k | hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction()); |
8768 | 3.68k | unsigned opcode = GetHLOpcode(userCall); |
8769 | | // For case element type of structure buffer is not structure type. |
8770 | 3.68k | if (baseOffset == nullptr) |
8771 | 0 | baseOffset = OP->GetU32Const(0); |
8772 | 3.68k | if (group == HLOpcodeGroup::HLIntrinsic) { |
8773 | 1.53k | IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode); |
8774 | 1.53k | switch (IOP) { |
8775 | 0 | case IntrinsicOp::MOP_Load: { |
8776 | 0 | if (userCall->getType()->isPointerTy()) { |
8777 | | // Struct will return pointers which like [] |
8778 | |
|
8779 | 0 | } else { |
8780 | | // Use builtin types on structuredBuffer. |
8781 | 0 | } |
8782 | 0 | DXASSERT(0, "not implement yet"); |
8783 | 0 | } break; |
8784 | 364 | case IntrinsicOp::IOP_InterlockedAdd: { |
8785 | 364 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8786 | 364 | baseOffset); |
8787 | 364 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Add, |
8788 | 364 | Builder, OP); |
8789 | 364 | } break; |
8790 | 72 | case IntrinsicOp::IOP_InterlockedAnd: { |
8791 | 72 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8792 | 72 | baseOffset); |
8793 | 72 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::And, |
8794 | 72 | Builder, OP); |
8795 | 72 | } break; |
8796 | 224 | case IntrinsicOp::IOP_InterlockedExchange: { |
8797 | 224 | Type *opType = nullptr; |
8798 | 224 | PointerType *ptrType = dyn_cast<PointerType>( |
8799 | 224 | userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex) |
8800 | 224 | ->getType()); |
8801 | 224 | if (ptrType && ptrType->getElementType()->isFloatTy()) |
8802 | 12 | opType = Type::getInt32Ty(userCall->getContext()); |
8803 | 224 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8804 | 224 | baseOffset, opType); |
8805 | 224 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Exchange, |
8806 | 224 | Builder, OP); |
8807 | 224 | } break; |
8808 | 40 | case IntrinsicOp::IOP_InterlockedMax: { |
8809 | 40 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8810 | 40 | baseOffset); |
8811 | 40 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMax, |
8812 | 40 | Builder, OP); |
8813 | 40 | } break; |
8814 | 40 | case IntrinsicOp::IOP_InterlockedMin: { |
8815 | 40 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8816 | 40 | baseOffset); |
8817 | 40 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::IMin, |
8818 | 40 | Builder, OP); |
8819 | 40 | } break; |
8820 | 52 | case IntrinsicOp::IOP_InterlockedUMax: { |
8821 | 52 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8822 | 52 | baseOffset); |
8823 | 52 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMax, |
8824 | 52 | Builder, OP); |
8825 | 52 | } break; |
8826 | 40 | case IntrinsicOp::IOP_InterlockedUMin: { |
8827 | 40 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8828 | 40 | baseOffset); |
8829 | 40 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::UMin, |
8830 | 40 | Builder, OP); |
8831 | 40 | } break; |
8832 | 96 | case IntrinsicOp::IOP_InterlockedOr: { |
8833 | 96 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8834 | 96 | baseOffset); |
8835 | 96 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Or, |
8836 | 96 | Builder, OP); |
8837 | 96 | } break; |
8838 | 72 | case IntrinsicOp::IOP_InterlockedXor: { |
8839 | 72 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicBinOp, handle, bufIdx, |
8840 | 72 | baseOffset); |
8841 | 72 | TranslateAtomicBinaryOperation(helper, DXIL::AtomicBinOpCode::Xor, |
8842 | 72 | Builder, OP); |
8843 | 72 | } break; |
8844 | 262 | case IntrinsicOp::IOP_InterlockedCompareStore: |
8845 | 508 | case IntrinsicOp::IOP_InterlockedCompareExchange: { |
8846 | 508 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange, |
8847 | 508 | handle, bufIdx, baseOffset); |
8848 | 508 | TranslateAtomicCmpXChg(helper, Builder, OP); |
8849 | 508 | } break; |
8850 | 14 | case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise: |
8851 | 28 | case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: { |
8852 | 28 | Type *i32Ty = Type::getInt32Ty(userCall->getContext()); |
8853 | 28 | AtomicHelper helper(userCall, DXIL::OpCode::AtomicCompareExchange, |
8854 | 28 | handle, bufIdx, baseOffset, i32Ty); |
8855 | 28 | TranslateAtomicCmpXChg(helper, Builder, OP); |
8856 | 28 | } break; |
8857 | 0 | default: |
8858 | 0 | DXASSERT(0, "invalid opcode"); |
8859 | 0 | break; |
8860 | 1.53k | } |
8861 | 1.53k | userCall->eraseFromParent(); |
8862 | 2.14k | } else if (group == HLOpcodeGroup::HLMatLoadStore) |
8863 | | // Load/Store matrix within a struct |
8864 | 2.00k | TranslateStructBufMatLdSt(userCall, handle, ResKind, OP, status, bufIdx, |
8865 | 2.00k | baseOffset, DL); |
8866 | 146 | else if (group == HLOpcodeGroup::HLSubscript) { |
8867 | | // Subscript of matrix within a struct |
8868 | 146 | TranslateStructBufMatSubscript(userCall, handle, ResKind, bufIdx, |
8869 | 146 | baseOffset, status, OP, DL); |
8870 | 146 | } |
8871 | 33.6k | } else if (LoadInst *LdInst = dyn_cast<LoadInst>(user)) { |
8872 | | // Load of scalar/vector within a struct or structured raw load. |
8873 | 9.17k | ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status); |
8874 | 9.17k | TranslateBufLoad(helper, ResKind, Builder, OP, DL); |
8875 | | |
8876 | 9.17k | LdInst->eraseFromParent(); |
8877 | 24.4k | } else if (StoreInst *StInst = dyn_cast<StoreInst>(user)) { |
8878 | | // Store of scalar/vector within a struct or structured raw store. |
8879 | 9.14k | Value *val = StInst->getValueOperand(); |
8880 | 9.14k | TranslateStore(DxilResource::Kind::StructuredBuffer, handle, val, bufIdx, |
8881 | 9.14k | baseOffset, Builder, OP); |
8882 | 9.14k | StInst->eraseFromParent(); |
8883 | 15.3k | } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(user)) { |
8884 | | // Recurse users |
8885 | 76 | for (auto U = BCI->user_begin(); U != BCI->user_end();) { |
8886 | 46 | Value *BCIUser = *(U++); |
8887 | 46 | TranslateStructBufSubscriptUser(cast<Instruction>(BCIUser), handle, |
8888 | 46 | ResKind, bufIdx, baseOffset, status, OP, |
8889 | 46 | DL); |
8890 | 46 | } |
8891 | 30 | BCI->eraseFromParent(); |
8892 | 15.2k | } else if (PHINode *Phi = dyn_cast<PHINode>(user)) { |
8893 | 4 | if (Phi->getNumIncomingValues() != 1) { |
8894 | 0 | dxilutil::EmitErrorOnInstruction( |
8895 | 0 | Phi, "Phi not supported for buffer subscript"); |
8896 | 0 | return; |
8897 | 0 | } |
8898 | | // Since the phi only has a single value we can safely process its |
8899 | | // users to translate the subscript. These single-value phis are |
8900 | | // inserted by the lcssa pass. |
8901 | 8 | for (auto U = Phi->user_begin(); 4 U != Phi->user_end();) { |
8902 | 4 | Value *PhiUser = *(U++); |
8903 | 4 | TranslateStructBufSubscriptUser(cast<Instruction>(PhiUser), handle, |
8904 | 4 | ResKind, bufIdx, baseOffset, status, OP, |
8905 | 4 | DL); |
8906 | 4 | } |
8907 | 4 | Phi->eraseFromParent(); |
8908 | 15.2k | } else { |
8909 | | // should only used by GEP |
8910 | 15.2k | GetElementPtrInst *GEP = cast<GetElementPtrInst>(user); |
8911 | 15.2k | Type *Ty = GEP->getType()->getPointerElementType(); |
8912 | | |
8913 | 15.2k | Value *offset = dxilutil::GEPIdxToOffset(GEP, Builder, OP, DL); |
8914 | 15.2k | DXASSERT_LOCALVAR(Ty, |
8915 | 15.2k | offset->getType() == Type::getInt32Ty(Ty->getContext()), |
8916 | 15.2k | "else bitness is wrong"); |
8917 | | // No offset into element for Raw buffers; byte offset is in bufIdx. |
8918 | 15.2k | if (DXIL::IsRawBuffer(ResKind)) |
8919 | 574 | bufIdx = Builder.CreateAdd(offset, bufIdx); |
8920 | 14.7k | else |
8921 | 14.7k | baseOffset = Builder.CreateAdd(offset, baseOffset); |
8922 | | |
8923 | 37.3k | for (auto U = GEP->user_begin(); U != GEP->user_end();) { |
8924 | 22.1k | Value *GEPUser = *(U++); |
8925 | | |
8926 | 22.1k | TranslateStructBufSubscriptUser(cast<Instruction>(GEPUser), handle, |
8927 | 22.1k | ResKind, bufIdx, baseOffset, status, OP, |
8928 | 22.1k | DL); |
8929 | 22.1k | } |
8930 | | // delete the inst |
8931 | 15.2k | GEP->eraseFromParent(); |
8932 | 15.2k | } |
8933 | 37.3k | } |
8934 | | |
8935 | | void TranslateStructBufSubscript(CallInst *CI, Value *handle, Value *status, |
8936 | | hlsl::OP *OP, HLResource::Kind ResKind, |
8937 | 12.9k | const DataLayout &DL) { |
8938 | 12.9k | Value *subscriptIndex = |
8939 | 12.9k | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); |
8940 | 12.9k | Value *bufIdx = nullptr; |
8941 | 12.9k | Value *offset = nullptr; |
8942 | 12.9k | bufIdx = subscriptIndex; |
8943 | 12.9k | if (ResKind == HLResource::Kind::RawBuffer) |
8944 | 284 | offset = UndefValue::get(Type::getInt32Ty(CI->getContext())); |
8945 | 12.6k | else |
8946 | | // StructuredBuffer, TypedBuffer, etc. |
8947 | 12.6k | offset = OP->GetU32Const(0); |
8948 | | |
8949 | 27.9k | for (auto U = CI->user_begin(); U != CI->user_end();) { |
8950 | 15.0k | Value *user = *(U++); |
8951 | | |
8952 | 15.0k | TranslateStructBufSubscriptUser(cast<Instruction>(user), handle, ResKind, |
8953 | 15.0k | bufIdx, offset, status, OP, DL); |
8954 | 15.0k | } |
8955 | 12.9k | } |
8956 | | } // namespace |
8957 | | |
8958 | | // HLSubscript. |
8959 | | namespace { |
8960 | | |
8961 | | Value *TranslateTypedBufSubscript(CallInst *CI, DXIL::ResourceKind RK, |
8962 | | DXIL::ResourceClass RC, Value *handle, |
8963 | | LoadInst *ldInst, IRBuilder<> &Builder, |
8964 | 2.75k | hlsl::OP *hlslOP, const DataLayout &DL) { |
8965 | | // The arguments to the call instruction are used to determine the access, |
8966 | | // the return value and type come from the load instruction. |
8967 | 2.75k | ResLoadHelper ldHelper(CI, RK, RC, handle, IntrinsicOp::MOP_Load, ldInst); |
8968 | 2.75k | TranslateBufLoad(ldHelper, RK, Builder, hlslOP, DL); |
8969 | | // delete the ld |
8970 | 2.75k | ldInst->eraseFromParent(); |
8971 | 2.75k | return ldHelper.retVal; |
8972 | 2.75k | } |
8973 | | |
8974 | | Value *UpdateVectorElt(Value *VecVal, Value *EltVal, Value *EltIdx, |
8975 | 16 | unsigned vectorSize, Instruction *InsertPt) { |
8976 | 16 | IRBuilder<> Builder(InsertPt); |
8977 | 16 | if (ConstantInt *CEltIdx = dyn_cast<ConstantInt>(EltIdx)) { |
8978 | 8 | VecVal = |
8979 | 8 | Builder.CreateInsertElement(VecVal, EltVal, CEltIdx->getLimitedValue()); |
8980 | 8 | } else { |
8981 | 8 | BasicBlock *BB = InsertPt->getParent(); |
8982 | 8 | BasicBlock *EndBB = BB->splitBasicBlock(InsertPt); |
8983 | | |
8984 | 8 | TerminatorInst *TI = BB->getTerminator(); |
8985 | 8 | IRBuilder<> SwitchBuilder(TI); |
8986 | 8 | LLVMContext &Ctx = InsertPt->getContext(); |
8987 | | |
8988 | 8 | SwitchInst *Switch = SwitchBuilder.CreateSwitch(EltIdx, EndBB, vectorSize); |
8989 | 8 | TI->eraseFromParent(); |
8990 | | |
8991 | 8 | Function *F = EndBB->getParent(); |
8992 | 8 | IRBuilder<> endSwitchBuilder(EndBB->begin()); |
8993 | 8 | Type *Ty = VecVal->getType(); |
8994 | 8 | PHINode *VecPhi = endSwitchBuilder.CreatePHI(Ty, vectorSize + 1); |
8995 | | |
8996 | 40 | for (unsigned i = 0; i < vectorSize; i++32 ) { |
8997 | 32 | BasicBlock *CaseBB = BasicBlock::Create(Ctx, "case", F, EndBB); |
8998 | 32 | Switch->addCase(SwitchBuilder.getInt32(i), CaseBB); |
8999 | 32 | IRBuilder<> CaseBuilder(CaseBB); |
9000 | | |
9001 | 32 | Value *CaseVal = CaseBuilder.CreateInsertElement(VecVal, EltVal, i); |
9002 | 32 | VecPhi->addIncoming(CaseVal, CaseBB); |
9003 | 32 | CaseBuilder.CreateBr(EndBB); |
9004 | 32 | } |
9005 | 8 | VecPhi->addIncoming(VecVal, BB); |
9006 | 8 | VecVal = VecPhi; |
9007 | 8 | } |
9008 | 16 | return VecVal; |
9009 | 16 | } |
9010 | | |
9011 | | void TranslateTypedBufferSubscript(CallInst *CI, HLOperationLowerHelper &helper, |
9012 | | HLObjectOperationLowerHelper *pObjHelper, |
9013 | 8.32k | bool &Translated) { |
9014 | 8.32k | Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
9015 | | |
9016 | 8.32k | hlsl::OP *hlslOP = &helper.hlslOP; |
9017 | | // Resource ptr. |
9018 | 8.32k | Value *handle = ptr; |
9019 | 8.32k | DXIL::ResourceClass RC = pObjHelper->GetRC(handle); |
9020 | 8.32k | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
9021 | | |
9022 | 8.32k | Type *Ty = CI->getType()->getPointerElementType(); |
9023 | | |
9024 | 16.9k | for (auto It = CI->user_begin(); It != CI->user_end();) { |
9025 | 8.61k | User *user = *(It++); |
9026 | 8.61k | Instruction *I = cast<Instruction>(user); |
9027 | 8.61k | IRBuilder<> Builder(I); |
9028 | 8.61k | Value *UndefI = UndefValue::get(Builder.getInt32Ty()); |
9029 | 8.61k | if (LoadInst *ldInst = dyn_cast<LoadInst>(user)) { |
9030 | 2.71k | TranslateTypedBufSubscript(CI, RK, RC, handle, ldInst, Builder, hlslOP, |
9031 | 2.71k | helper.dataLayout); |
9032 | 5.89k | } else if (StoreInst *stInst = dyn_cast<StoreInst>(user)) { |
9033 | 3.19k | Value *val = stInst->getValueOperand(); |
9034 | 3.19k | TranslateStore(RK, handle, val, |
9035 | 3.19k | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), |
9036 | 3.19k | UndefI, Builder, hlslOP); |
9037 | | // delete the st |
9038 | 3.19k | stInst->eraseFromParent(); |
9039 | 3.19k | } else if (GetElementPtrInst *2.70k GEP2.70k = dyn_cast<GetElementPtrInst>(user)) { |
9040 | | // Must be vector type here. |
9041 | 56 | unsigned vectorSize = Ty->getVectorNumElements(); |
9042 | 56 | DXASSERT_NOMSG(GEP->getNumIndices() == 2); |
9043 | 56 | Use *GEPIdx = GEP->idx_begin(); |
9044 | 56 | GEPIdx++; |
9045 | 56 | Value *EltIdx = *GEPIdx; |
9046 | 96 | for (auto GEPIt = GEP->user_begin(); GEPIt != GEP->user_end();) { |
9047 | 56 | User *GEPUser = *(GEPIt++); |
9048 | 56 | if (StoreInst *SI = dyn_cast<StoreInst>(GEPUser)) { |
9049 | 16 | IRBuilder<> StBuilder(SI); |
9050 | | // Generate Ld. |
9051 | 16 | LoadInst *tmpLd = StBuilder.CreateLoad(CI); |
9052 | | |
9053 | 16 | Value *ldVal = TranslateTypedBufSubscript( |
9054 | 16 | CI, RK, RC, handle, tmpLd, StBuilder, hlslOP, helper.dataLayout); |
9055 | | // Update vector. |
9056 | 16 | ldVal = UpdateVectorElt(ldVal, SI->getValueOperand(), EltIdx, |
9057 | 16 | vectorSize, SI); |
9058 | | // Generate St. |
9059 | | // Reset insert point, UpdateVectorElt may move SI to different block. |
9060 | 16 | StBuilder.SetInsertPoint(SI); |
9061 | 16 | TranslateStore( |
9062 | 16 | RK, handle, ldVal, |
9063 | 16 | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), UndefI, |
9064 | 16 | StBuilder, hlslOP); |
9065 | 16 | SI->eraseFromParent(); |
9066 | 16 | continue; |
9067 | 16 | } |
9068 | 40 | if (LoadInst *LI = dyn_cast<LoadInst>(GEPUser)) { |
9069 | 24 | IRBuilder<> LdBuilder(LI); |
9070 | | |
9071 | | // Generate tmp vector load with vector type & translate it |
9072 | 24 | LoadInst *tmpLd = LdBuilder.CreateLoad(CI); |
9073 | | |
9074 | 24 | Value *ldVal = TranslateTypedBufSubscript( |
9075 | 24 | CI, RK, RC, handle, tmpLd, LdBuilder, hlslOP, helper.dataLayout); |
9076 | | |
9077 | | // get the single element |
9078 | 24 | ldVal = GenerateVecEltFromGEP(ldVal, GEP, LdBuilder, |
9079 | 24 | /*bInsertLdNextToGEP*/ false); |
9080 | | |
9081 | 24 | LI->replaceAllUsesWith(ldVal); |
9082 | 24 | LI->eraseFromParent(); |
9083 | 24 | continue; |
9084 | 24 | } |
9085 | | // Invalid operations. |
9086 | 16 | Translated = false; |
9087 | 16 | dxilutil::EmitErrorOnInstruction(GEP, |
9088 | 16 | "Invalid operation on typed buffer."); |
9089 | 16 | return; |
9090 | 40 | } |
9091 | 40 | GEP->eraseFromParent(); |
9092 | 2.64k | } else { |
9093 | 2.64k | CallInst *userCall = cast<CallInst>(user); |
9094 | 2.64k | HLOpcodeGroup group = |
9095 | 2.64k | hlsl::GetHLOpcodeGroupByName(userCall->getCalledFunction()); |
9096 | 2.64k | unsigned opcode = hlsl::GetHLOpcode(userCall); |
9097 | 2.64k | if (group == HLOpcodeGroup::HLIntrinsic) { |
9098 | 2.64k | IntrinsicOp IOP = static_cast<IntrinsicOp>(opcode); |
9099 | 2.64k | if (RC == DXIL::ResourceClass::SRV) { |
9100 | | // Invalid operations. |
9101 | 0 | Translated = false; |
9102 | 0 | dxilutil::EmitErrorOnInstruction(userCall, |
9103 | 0 | "Invalid operation on SRV."); |
9104 | 0 | return; |
9105 | 0 | } |
9106 | 2.64k | switch (IOP) { |
9107 | 370 | case IntrinsicOp::IOP_InterlockedAdd: { |
9108 | 370 | ResLoadHelper helper(CI, RK, RC, handle, |
9109 | 370 | IntrinsicOp::IOP_InterlockedAdd); |
9110 | 370 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9111 | 370 | helper.addr, /*offset*/ nullptr); |
9112 | 370 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Add, |
9113 | 370 | Builder, hlslOP); |
9114 | 370 | } break; |
9115 | 192 | case IntrinsicOp::IOP_InterlockedAnd: { |
9116 | 192 | ResLoadHelper helper(CI, RK, RC, handle, |
9117 | 192 | IntrinsicOp::IOP_InterlockedAnd); |
9118 | 192 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9119 | 192 | helper.addr, /*offset*/ nullptr); |
9120 | 192 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::And, |
9121 | 192 | Builder, hlslOP); |
9122 | 192 | } break; |
9123 | 356 | case IntrinsicOp::IOP_InterlockedExchange: { |
9124 | 356 | ResLoadHelper helper(CI, RK, RC, handle, |
9125 | 356 | IntrinsicOp::IOP_InterlockedExchange); |
9126 | 356 | Type *opType = nullptr; |
9127 | 356 | PointerType *ptrType = dyn_cast<PointerType>( |
9128 | 356 | userCall->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex) |
9129 | 356 | ->getType()); |
9130 | 356 | if (ptrType && ptrType->getElementType()->isFloatTy()) |
9131 | 12 | opType = Type::getInt32Ty(userCall->getContext()); |
9132 | 356 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9133 | 356 | helper.addr, /*offset*/ nullptr, opType); |
9134 | 356 | TranslateAtomicBinaryOperation( |
9135 | 356 | atomHelper, DXIL::AtomicBinOpCode::Exchange, Builder, hlslOP); |
9136 | 356 | } break; |
9137 | 108 | case IntrinsicOp::IOP_InterlockedMax: { |
9138 | 108 | ResLoadHelper helper(CI, RK, RC, handle, |
9139 | 108 | IntrinsicOp::IOP_InterlockedMax); |
9140 | 108 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9141 | 108 | helper.addr, /*offset*/ nullptr); |
9142 | 108 | TranslateAtomicBinaryOperation( |
9143 | 108 | atomHelper, DXIL::AtomicBinOpCode::IMax, Builder, hlslOP); |
9144 | 108 | } break; |
9145 | 108 | case IntrinsicOp::IOP_InterlockedMin: { |
9146 | 108 | ResLoadHelper helper(CI, RK, RC, handle, |
9147 | 108 | IntrinsicOp::IOP_InterlockedMin); |
9148 | 108 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9149 | 108 | helper.addr, /*offset*/ nullptr); |
9150 | 108 | TranslateAtomicBinaryOperation( |
9151 | 108 | atomHelper, DXIL::AtomicBinOpCode::IMin, Builder, hlslOP); |
9152 | 108 | } break; |
9153 | 116 | case IntrinsicOp::IOP_InterlockedUMax: { |
9154 | 116 | ResLoadHelper helper(CI, RK, RC, handle, |
9155 | 116 | IntrinsicOp::IOP_InterlockedUMax); |
9156 | 116 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9157 | 116 | helper.addr, /*offset*/ nullptr); |
9158 | 116 | TranslateAtomicBinaryOperation( |
9159 | 116 | atomHelper, DXIL::AtomicBinOpCode::UMax, Builder, hlslOP); |
9160 | 116 | } break; |
9161 | 116 | case IntrinsicOp::IOP_InterlockedUMin: { |
9162 | 116 | ResLoadHelper helper(CI, RK, RC, handle, |
9163 | 116 | IntrinsicOp::IOP_InterlockedUMin); |
9164 | 116 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9165 | 116 | helper.addr, /*offset*/ nullptr); |
9166 | 116 | TranslateAtomicBinaryOperation( |
9167 | 116 | atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP); |
9168 | 116 | } break; |
9169 | 200 | case IntrinsicOp::IOP_InterlockedOr: { |
9170 | 200 | ResLoadHelper helper(CI, RK, RC, handle, |
9171 | 200 | IntrinsicOp::IOP_InterlockedOr); |
9172 | 200 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9173 | 200 | helper.addr, /*offset*/ nullptr); |
9174 | 200 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or, |
9175 | 200 | Builder, hlslOP); |
9176 | 200 | } break; |
9177 | 192 | case IntrinsicOp::IOP_InterlockedXor: { |
9178 | 192 | ResLoadHelper helper(CI, RK, RC, handle, |
9179 | 192 | IntrinsicOp::IOP_InterlockedXor); |
9180 | 192 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, |
9181 | 192 | helper.addr, /*offset*/ nullptr); |
9182 | 192 | TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor, |
9183 | 192 | Builder, hlslOP); |
9184 | 192 | } break; |
9185 | 442 | case IntrinsicOp::IOP_InterlockedCompareStore: |
9186 | 860 | case IntrinsicOp::IOP_InterlockedCompareExchange: { |
9187 | 860 | ResLoadHelper helper(CI, RK, RC, handle, |
9188 | 860 | IntrinsicOp::IOP_InterlockedCompareExchange); |
9189 | 860 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange, |
9190 | 860 | handle, helper.addr, /*offset*/ nullptr); |
9191 | 860 | TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP); |
9192 | 860 | } break; |
9193 | 14 | case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise: |
9194 | 28 | case IntrinsicOp::IOP_InterlockedCompareExchangeFloatBitwise: { |
9195 | 28 | Type *i32Ty = Type::getInt32Ty(userCall->getContext()); |
9196 | 28 | ResLoadHelper helper(CI, RK, RC, handle, |
9197 | 28 | IntrinsicOp::IOP_InterlockedCompareExchange); |
9198 | 28 | AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange, |
9199 | 28 | handle, helper.addr, /*offset*/ nullptr, |
9200 | 28 | i32Ty); |
9201 | 28 | TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP); |
9202 | 28 | } break; |
9203 | 0 | default: |
9204 | 0 | DXASSERT(0, "invalid opcode"); |
9205 | 0 | break; |
9206 | 2.64k | } |
9207 | 2.64k | } else { |
9208 | 0 | DXASSERT(0, "invalid group"); |
9209 | 0 | } |
9210 | 2.64k | userCall->eraseFromParent(); |
9211 | 2.64k | } |
9212 | 8.61k | } |
9213 | 8.32k | } |
9214 | | } // namespace |
9215 | | |
9216 | | void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, |
9217 | | HLOperationLowerHelper &helper, |
9218 | | HLObjectOperationLowerHelper *pObjHelper, |
9219 | 29.6k | bool &Translated) { |
9220 | 29.6k | if (CI->user_empty()) { |
9221 | 0 | Translated = true; |
9222 | 0 | return; |
9223 | 0 | } |
9224 | 29.6k | hlsl::OP *hlslOP = &helper.hlslOP; |
9225 | | |
9226 | 29.6k | Value *ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
9227 | 29.6k | if (opcode == HLSubscriptOpcode::CBufferSubscript) { |
9228 | 8.72k | dxilutil::MergeGepUse(CI); |
9229 | | // Resource ptr. |
9230 | 8.72k | Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); |
9231 | 8.72k | TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys, |
9232 | 8.72k | helper.dataLayout, pObjHelper); |
9233 | 8.72k | Translated = true; |
9234 | 8.72k | return; |
9235 | 8.72k | } |
9236 | | |
9237 | 20.8k | if (opcode == HLSubscriptOpcode::DoubleSubscript) { |
9238 | | // Resource ptr. |
9239 | 180 | Value *handle = ptr; |
9240 | 180 | DXIL::ResourceKind RK = pObjHelper->GetRK(handle); |
9241 | 180 | Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); |
9242 | 180 | Value *mipLevel = |
9243 | 180 | CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx); |
9244 | | |
9245 | 180 | auto U = CI->user_begin(); |
9246 | 180 | DXASSERT(CI->hasOneUse(), "subscript should only have one use"); |
9247 | 180 | IRBuilder<> Builder(CI); |
9248 | 180 | if (LoadInst *ldInst = dyn_cast<LoadInst>(*U)) { |
9249 | 140 | Value *Offset = UndefValue::get(Builder.getInt32Ty()); |
9250 | 140 | ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, |
9251 | 140 | /*status*/ nullptr, mipLevel); |
9252 | 140 | TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); |
9253 | 140 | ldInst->eraseFromParent(); |
9254 | 140 | } else { |
9255 | 40 | StoreInst *stInst = cast<StoreInst>(*U); |
9256 | 40 | Value *val = stInst->getValueOperand(); |
9257 | 40 | Value *UndefI = UndefValue::get(Builder.getInt32Ty()); |
9258 | 40 | TranslateStore(RK, handle, val, |
9259 | 40 | CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx), |
9260 | 40 | UndefI, Builder, hlslOP, mipLevel); |
9261 | 40 | stInst->eraseFromParent(); |
9262 | 40 | } |
9263 | 180 | Translated = true; |
9264 | 180 | return; |
9265 | 180 | } |
9266 | | |
9267 | 20.6k | Type *HandleTy = hlslOP->GetHandleType(); |
9268 | 20.6k | if (ptr->getType() == hlslOP->GetNodeRecordHandleType()) { |
9269 | 0 | DXASSERT(false, "Shouldn't get here, NodeRecord subscripts should have " |
9270 | 0 | "been lowered in LowerRecordAccessToGetNodeRecordPtr"); |
9271 | 0 | return; |
9272 | 0 | } |
9273 | | |
9274 | 20.6k | if (ptr->getType() == HandleTy) { |
9275 | | // Resource ptr. |
9276 | 20.1k | Value *handle = ptr; |
9277 | 20.1k | DXIL::ResourceKind RK = DxilResource::Kind::Invalid; |
9278 | 20.1k | Type *ObjTy = nullptr; |
9279 | 20.1k | Type *RetTy = nullptr; |
9280 | 20.1k | RK = pObjHelper->GetRK(handle); |
9281 | 20.1k | if (RK == DxilResource::Kind::Invalid) { |
9282 | 0 | Translated = false; |
9283 | 0 | return; |
9284 | 0 | } |
9285 | 20.1k | ObjTy = pObjHelper->GetResourceType(handle); |
9286 | 20.1k | RetTy = ObjTy->getStructElementType(0); |
9287 | 20.1k | Translated = true; |
9288 | | |
9289 | 20.1k | if (DXIL::IsStructuredBuffer(RK)) |
9290 | 11.8k | TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK, |
9291 | 11.8k | helper.dataLayout); |
9292 | 8.32k | else |
9293 | 8.32k | TranslateTypedBufferSubscript(CI, helper, pObjHelper, Translated); |
9294 | | |
9295 | 20.1k | return; |
9296 | 20.1k | } |
9297 | | |
9298 | 496 | Value *basePtr = CI->getArgOperand(HLOperandIndex::kMatSubscriptMatOpIdx); |
9299 | 496 | if (IsLocalVariablePtr(basePtr) || IsSharedMemPtr(basePtr)) { |
9300 | | // Translate matrix into vector of array for share memory or local |
9301 | | // variable should be done in HLMatrixLowerPass |
9302 | 0 | DXASSERT_NOMSG(0); |
9303 | 0 | Translated = true; |
9304 | 0 | return; |
9305 | 0 | } |
9306 | | |
9307 | | // Other case should be take care in TranslateStructBufSubscript or |
9308 | | // TranslateCBOperations. |
9309 | 496 | Translated = false; |
9310 | 496 | } |
9311 | | |
9312 | | void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, |
9313 | 12.6k | HLObjectOperationLowerHelper *pObjHelper) { |
9314 | 42.2k | for (auto U = F->user_begin(); U != F->user_end();) { |
9315 | 29.6k | Value *user = *(U++); |
9316 | 29.6k | if (!isa<Instruction>(user)) |
9317 | 0 | continue; |
9318 | | // must be call inst |
9319 | 29.6k | CallInst *CI = cast<CallInst>(user); |
9320 | 29.6k | unsigned opcode = GetHLOpcode(CI); |
9321 | 29.6k | bool Translated = true; |
9322 | 29.6k | TranslateHLSubscript(CI, static_cast<HLSubscriptOpcode>(opcode), helper, |
9323 | 29.6k | pObjHelper, Translated); |
9324 | 29.6k | if (Translated) { |
9325 | | // delete the call |
9326 | 29.0k | DXASSERT(CI->use_empty(), |
9327 | 29.0k | "else TranslateHLSubscript didn't replace/erase uses"); |
9328 | 29.0k | CI->eraseFromParent(); |
9329 | 29.0k | } |
9330 | 29.6k | } |
9331 | 12.6k | } |
9332 | | |
9333 | | // Create BitCast if ptr, otherwise, create alloca of new type, write to bitcast |
9334 | | // of alloca, and return load from alloca If bOrigAllocaTy is true: create |
9335 | | // alloca of old type instead, write to alloca, and return load from bitcast of |
9336 | | // alloca |
9337 | | static Instruction *BitCastValueOrPtr(Value *V, Instruction *Insert, Type *Ty, |
9338 | | bool bOrigAllocaTy = false, |
9339 | 164 | const Twine &Name = "") { |
9340 | 164 | IRBuilder<> Builder(Insert); |
9341 | 164 | if (Ty->isPointerTy()) { |
9342 | | // If pointer, we can bitcast directly |
9343 | 0 | return cast<Instruction>(Builder.CreateBitCast(V, Ty, Name)); |
9344 | 0 | } |
9345 | | |
9346 | | // If value, we have to alloca, store to bitcast ptr, and load |
9347 | 164 | IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Insert)); |
9348 | 164 | Type *allocaTy = bOrigAllocaTy ? V->getType()0 : Ty; |
9349 | 164 | Type *otherTy = bOrigAllocaTy ? Ty0 : V->getType(); |
9350 | 164 | Instruction *allocaInst = AllocaBuilder.CreateAlloca(allocaTy); |
9351 | 164 | Instruction *bitCast = cast<Instruction>( |
9352 | 164 | Builder.CreateBitCast(allocaInst, otherTy->getPointerTo())); |
9353 | 164 | Builder.CreateStore(V, bOrigAllocaTy ? allocaInst0 : bitCast); |
9354 | 164 | return Builder.CreateLoad(bOrigAllocaTy ? bitCast0 : allocaInst, Name); |
9355 | 164 | } |
9356 | | |
9357 | | static Instruction *CreateTransposeShuffle(IRBuilder<> &Builder, Value *vecVal, |
9358 | 0 | unsigned toRows, unsigned toCols) { |
9359 | 0 | SmallVector<int, 16> castMask(toCols * toRows); |
9360 | 0 | unsigned idx = 0; |
9361 | 0 | for (unsigned r = 0; r < toRows; r++) |
9362 | 0 | for (unsigned c = 0; c < toCols; c++) |
9363 | 0 | castMask[idx++] = c * toRows + r; |
9364 | 0 | return cast<Instruction>( |
9365 | 0 | Builder.CreateShuffleVector(vecVal, vecVal, castMask)); |
9366 | 0 | } |
9367 | | |
9368 | | void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper, |
9369 | | hlsl::HLOpcodeGroup group, |
9370 | 85.8k | HLObjectOperationLowerHelper *pObjHelper) { |
9371 | 85.8k | if (group == HLOpcodeGroup::HLIntrinsic) { |
9372 | | // map to dxil operations |
9373 | 88.4k | for (auto U = F->user_begin(); U != F->user_end();) { |
9374 | 65.2k | Value *User = *(U++); |
9375 | 65.2k | if (!isa<Instruction>(User)) |
9376 | 0 | continue; |
9377 | | // must be call inst |
9378 | 65.2k | CallInst *CI = cast<CallInst>(User); |
9379 | | |
9380 | | // Keep the instruction to lower by other function. |
9381 | 65.2k | bool Translated = true; |
9382 | | |
9383 | 65.2k | TranslateBuiltinIntrinsic(CI, helper, pObjHelper, Translated); |
9384 | | |
9385 | 65.2k | if (Translated) { |
9386 | | // delete the call |
9387 | 64.2k | DXASSERT(CI->use_empty(), |
9388 | 64.2k | "else TranslateBuiltinIntrinsic didn't replace/erase uses"); |
9389 | 64.2k | CI->eraseFromParent(); |
9390 | 64.2k | } |
9391 | 65.2k | } |
9392 | 62.6k | } else { |
9393 | 62.6k | if (group == HLOpcodeGroup::HLMatLoadStore) { |
9394 | | // Both ld/st use arg1 for the pointer. |
9395 | 0 | Type *PtrTy = |
9396 | 0 | F->getFunctionType()->getParamType(HLOperandIndex::kMatLoadPtrOpIdx); |
9397 | |
|
9398 | 0 | if (PtrTy->getPointerAddressSpace() == DXIL::kTGSMAddrSpace) { |
9399 | | // Translate matrix into vector of array for shared memory |
9400 | | // variable should be done in HLMatrixLowerPass. |
9401 | 0 | if (!F->user_empty()) |
9402 | 0 | F->getContext().emitError("Fail to lower matrix load/store."); |
9403 | 0 | } else if (PtrTy->getPointerAddressSpace() == DXIL::kDefaultAddrSpace) { |
9404 | | // Default address space may be function argument in lib target |
9405 | 0 | if (!F->user_empty()) { |
9406 | 0 | for (auto U = F->user_begin(); U != F->user_end();) { |
9407 | 0 | Value *User = *(U++); |
9408 | 0 | if (!isa<Instruction>(User)) |
9409 | 0 | continue; |
9410 | | // must be call inst |
9411 | 0 | CallInst *CI = cast<CallInst>(User); |
9412 | 0 | IRBuilder<> Builder(CI); |
9413 | 0 | HLMatLoadStoreOpcode opcode = |
9414 | 0 | static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI)); |
9415 | 0 | switch (opcode) { |
9416 | 0 | case HLMatLoadStoreOpcode::ColMatStore: |
9417 | 0 | case HLMatLoadStoreOpcode::RowMatStore: { |
9418 | 0 | Value *vecVal = |
9419 | 0 | CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); |
9420 | 0 | Value *matPtr = |
9421 | 0 | CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); |
9422 | 0 | matPtr = SkipAddrSpaceCast(matPtr); |
9423 | 0 | unsigned addrSpace = |
9424 | 0 | cast<PointerType>(matPtr->getType())->getAddressSpace(); |
9425 | |
|
9426 | 0 | Value *castPtr = Builder.CreateBitCast( |
9427 | 0 | matPtr, vecVal->getType()->getPointerTo(addrSpace)); |
9428 | 0 | Builder.CreateStore(vecVal, castPtr); |
9429 | 0 | CI->eraseFromParent(); |
9430 | 0 | } break; |
9431 | 0 | case HLMatLoadStoreOpcode::ColMatLoad: |
9432 | 0 | case HLMatLoadStoreOpcode::RowMatLoad: { |
9433 | 0 | Value *matPtr = |
9434 | 0 | CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); |
9435 | 0 | matPtr = SkipAddrSpaceCast(matPtr); |
9436 | 0 | unsigned addrSpace = |
9437 | 0 | cast<PointerType>(matPtr->getType())->getAddressSpace(); |
9438 | 0 | Value *castPtr = Builder.CreateBitCast( |
9439 | 0 | matPtr, CI->getType()->getPointerTo(addrSpace)); |
9440 | 0 | Value *vecVal = Builder.CreateLoad(castPtr); |
9441 | 0 | CI->replaceAllUsesWith(vecVal); |
9442 | 0 | CI->eraseFromParent(); |
9443 | 0 | } break; |
9444 | 0 | } |
9445 | 0 | } |
9446 | 0 | } |
9447 | 0 | } |
9448 | 62.6k | } else if (group == HLOpcodeGroup::HLCast) { |
9449 | | // HLCast may be used on matrix value function argument in lib target |
9450 | 1.98k | if (!F->user_empty()) { |
9451 | 5.36k | for (auto U = F->user_begin(); U != F->user_end();) { |
9452 | 3.37k | Value *User = *(U++); |
9453 | 3.37k | if (!isa<Instruction>(User)) |
9454 | 0 | continue; |
9455 | | // must be call inst |
9456 | 3.37k | CallInst *CI = cast<CallInst>(User); |
9457 | 3.37k | IRBuilder<> Builder(CI); |
9458 | 3.37k | HLCastOpcode opcode = |
9459 | 3.37k | static_cast<HLCastOpcode>(hlsl::GetHLOpcode(CI)); |
9460 | 3.37k | bool bTranspose = false; |
9461 | 3.37k | bool bColDest = false; |
9462 | 3.37k | switch (opcode) { |
9463 | 0 | case HLCastOpcode::RowMatrixToColMatrix: |
9464 | 0 | bColDest = true; |
9465 | 0 | LLVM_FALLTHROUGH; |
9466 | 0 | case HLCastOpcode::ColMatrixToRowMatrix: |
9467 | 0 | bTranspose = true; |
9468 | 0 | LLVM_FALLTHROUGH; |
9469 | 78 | case HLCastOpcode::ColMatrixToVecCast: |
9470 | 164 | case HLCastOpcode::RowMatrixToVecCast: { |
9471 | 164 | Value *matVal = |
9472 | 164 | CI->getArgOperand(HLOperandIndex::kInitFirstArgOpIdx); |
9473 | 164 | Value *vecVal = |
9474 | 164 | BitCastValueOrPtr(matVal, CI, CI->getType(), |
9475 | 164 | /*bOrigAllocaTy*/ false, matVal->getName()); |
9476 | 164 | if (bTranspose) { |
9477 | 0 | HLMatrixType MatTy = HLMatrixType::cast(matVal->getType()); |
9478 | 0 | unsigned row = MatTy.getNumRows(); |
9479 | 0 | unsigned col = MatTy.getNumColumns(); |
9480 | 0 | if (bColDest) |
9481 | 0 | std::swap(row, col); |
9482 | 0 | vecVal = CreateTransposeShuffle(Builder, vecVal, row, col); |
9483 | 0 | } |
9484 | 164 | CI->replaceAllUsesWith(vecVal); |
9485 | 164 | CI->eraseFromParent(); |
9486 | 164 | } break; |
9487 | 3.37k | } |
9488 | 3.37k | } |
9489 | 1.98k | } |
9490 | 60.6k | } else if (group == HLOpcodeGroup::HLSubscript) { |
9491 | 12.6k | TranslateSubscriptOperation(F, helper, pObjHelper); |
9492 | 12.6k | } |
9493 | | // map to math function or llvm ir |
9494 | 62.6k | } |
9495 | 85.8k | } |
9496 | | |
9497 | | typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap; |
9498 | | static void TranslateHLExtension(Function *F, |
9499 | | HLSLExtensionsCodegenHelper *helper, |
9500 | | OP &hlslOp, |
9501 | 68 | HLObjectOperationLowerHelper &objHelper) { |
9502 | | // Find all calls to the function F. |
9503 | | // Store the calls in a vector for now to be replaced the loop below. |
9504 | | // We use a two step "find then replace" to avoid removing uses while |
9505 | | // iterating. |
9506 | 68 | SmallVector<CallInst *, 8> CallsToReplace; |
9507 | 72 | for (User *U : F->users()) { |
9508 | 72 | if (CallInst *CI = dyn_cast<CallInst>(U)) { |
9509 | 72 | CallsToReplace.push_back(CI); |
9510 | 72 | } |
9511 | 72 | } |
9512 | | |
9513 | | // Get the lowering strategy to use for this intrinsic. |
9514 | 68 | llvm::StringRef LowerStrategy = GetHLLowerStrategy(F); |
9515 | 68 | HLObjectExtensionLowerHelper extObjHelper(objHelper); |
9516 | 68 | ExtensionLowering lower(LowerStrategy, helper, hlslOp, extObjHelper); |
9517 | | |
9518 | | // Replace all calls that were successfully translated. |
9519 | 72 | for (CallInst *CI : CallsToReplace) { |
9520 | 72 | Value *Result = lower.Translate(CI); |
9521 | 72 | if (Result && Result != CI) { |
9522 | 72 | CI->replaceAllUsesWith(Result); |
9523 | 72 | CI->eraseFromParent(); |
9524 | 72 | } |
9525 | 72 | } |
9526 | 68 | } |
9527 | | |
9528 | | namespace hlsl { |
9529 | | |
9530 | | void TranslateBuiltinOperations( |
9531 | | HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper, |
9532 | 20.3k | std::unordered_set<Instruction *> &UpdateCounterSet) { |
9533 | 20.3k | HLOperationLowerHelper helper(HLM); |
9534 | | |
9535 | 20.3k | HLObjectOperationLowerHelper objHelper = {HLM, UpdateCounterSet}; |
9536 | | |
9537 | 20.3k | Module *M = HLM.GetModule(); |
9538 | | |
9539 | 20.3k | SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics; |
9540 | | |
9541 | | // generate dxil operation |
9542 | 195k | for (iplist<Function>::iterator F : M->getFunctionList()) { |
9543 | 195k | if (F->user_empty()) |
9544 | 34.4k | continue; |
9545 | 161k | if (!F->isDeclaration()) { |
9546 | 172 | continue; |
9547 | 172 | } |
9548 | 161k | hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); |
9549 | 161k | if (group == HLOpcodeGroup::NotHL) { |
9550 | | // Nothing to do. |
9551 | 75.0k | continue; |
9552 | 75.0k | } |
9553 | 85.9k | if (group == HLOpcodeGroup::HLExtIntrinsic) { |
9554 | 68 | TranslateHLExtension(F, extCodegenHelper, helper.hlslOP, objHelper); |
9555 | 68 | continue; |
9556 | 68 | } |
9557 | 85.8k | if (group == HLOpcodeGroup::HLIntrinsic) { |
9558 | 23.2k | CallInst *CI = cast<CallInst>(*F->user_begin()); // must be call inst |
9559 | 23.2k | unsigned opcode = hlsl::GetHLOpcode(CI); |
9560 | 23.2k | if (opcode == (unsigned)IntrinsicOp::IOP_NonUniformResourceIndex) { |
9561 | 116 | NonUniformResourceIndexIntrinsics.push_back(F); |
9562 | 116 | continue; |
9563 | 116 | } |
9564 | 23.2k | } |
9565 | 85.7k | TranslateHLBuiltinOperation(F, helper, group, &objHelper); |
9566 | 85.7k | } |
9567 | | |
9568 | | // Translate last so value placed in NonUniformSet is still valid. |
9569 | 20.3k | if (!NonUniformResourceIndexIntrinsics.empty()) { |
9570 | 116 | for (auto F : NonUniformResourceIndexIntrinsics) { |
9571 | 116 | TranslateHLBuiltinOperation(F, helper, HLOpcodeGroup::HLIntrinsic, |
9572 | 116 | &objHelper); |
9573 | 116 | } |
9574 | 90 | } |
9575 | 20.3k | } |
9576 | | |
9577 | | void EmitGetNodeRecordPtrAndUpdateUsers(HLOperationLowerHelper &helper, |
9578 | 636 | CallInst *CI, Value *ArrayIndex) { |
9579 | 636 | IRBuilder<> Builder(CI); |
9580 | 636 | Value *opArg = nullptr; |
9581 | 636 | Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
9582 | 636 | opArg = Builder.getInt32((unsigned)DXIL::OpCode::GetNodeRecordPtr); |
9583 | 636 | StructType *origRecordUDT = |
9584 | 636 | cast<StructType>(cast<PointerType>(CI->getType())->getElementType()); |
9585 | 636 | Type *getNodeRecordPtrRT = origRecordUDT; |
9586 | | // Translate node record type here |
9587 | 636 | auto findIt = helper.loweredTypes.find(origRecordUDT); |
9588 | 636 | if (findIt != helper.loweredTypes.end()) { |
9589 | 244 | getNodeRecordPtrRT = findIt->second; |
9590 | 392 | } else { |
9591 | 392 | getNodeRecordPtrRT = GetLoweredUDT(origRecordUDT, &helper.dxilTypeSys); |
9592 | 392 | if (origRecordUDT != getNodeRecordPtrRT) |
9593 | 112 | helper.loweredTypes[origRecordUDT] = getNodeRecordPtrRT; |
9594 | 392 | } |
9595 | 636 | getNodeRecordPtrRT = |
9596 | 636 | getNodeRecordPtrRT->getPointerTo(DXIL::kNodeRecordAddrSpace); |
9597 | 636 | Function *getNodeRecordPtr = helper.hlslOP.GetOpFunc( |
9598 | 636 | DXIL::OpCode::GetNodeRecordPtr, getNodeRecordPtrRT); |
9599 | 636 | Value *args[] = {opArg, Handle, ArrayIndex}; |
9600 | 636 | Value *NodeRecordPtr = Builder.CreateCall(getNodeRecordPtr, args); |
9601 | 636 | ReplaceUsesForLoweredUDT(CI, NodeRecordPtr); |
9602 | 636 | } |
9603 | | |
9604 | 20.3k | void LowerRecordAccessToGetNodeRecordPtr(HLModule &HLM) { |
9605 | 20.3k | Module *M = HLM.GetModule(); |
9606 | 20.3k | HLOperationLowerHelper helper(HLM); |
9607 | 163k | for (iplist<Function>::iterator F : M->getFunctionList()) { |
9608 | 163k | if (F->user_empty()) |
9609 | 31.9k | continue; |
9610 | 131k | hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); |
9611 | 131k | if (group == HLOpcodeGroup::HLSubscript) { |
9612 | 43.2k | for (auto U = F->user_begin(); U != F->user_end();) { |
9613 | 30.2k | Value *User = *(U++); |
9614 | 30.2k | if (!isa<Instruction>(User)) |
9615 | 0 | continue; |
9616 | | // must be call inst |
9617 | 30.2k | CallInst *CI = cast<CallInst>(User); |
9618 | 30.2k | HLSubscriptOpcode opcode = |
9619 | 30.2k | static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI)); |
9620 | 30.2k | if (opcode != HLSubscriptOpcode::DefaultSubscript) |
9621 | 9.46k | continue; |
9622 | | |
9623 | 20.8k | hlsl::OP *OP = &helper.hlslOP; |
9624 | 20.8k | Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); |
9625 | 20.8k | if (Handle->getType() != OP->GetNodeRecordHandleType()) { |
9626 | 20.1k | continue; |
9627 | 20.1k | } |
9628 | | |
9629 | 636 | Value *Index = CI->getNumArgOperands() > 2 |
9630 | 636 | ? CI->getArgOperand(2)324 |
9631 | 636 | : ConstantInt::get(helper.i32Ty, 0)312 ; |
9632 | 636 | EmitGetNodeRecordPtrAndUpdateUsers(helper, CI, Index); |
9633 | 636 | CI->eraseFromParent(); |
9634 | 636 | } |
9635 | 12.9k | } |
9636 | 131k | } |
9637 | 20.3k | } |
9638 | | } // namespace hlsl |