/home/runner/work/DirectXShaderCompiler/DirectXShaderCompiler/lib/DxilPIXPasses/DxilPIXMeshShaderOutputInstrumentation.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | // // |
3 | | // DxilAddPixelHitInstrumentation.cpp // |
4 | | // Copyright (C) Microsoft Corporation. All rights reserved. // |
5 | | // This file is distributed under the University of Illinois Open Source // |
6 | | // License. See LICENSE.TXT for details. // |
7 | | // // |
8 | | // Provides a pass to add instrumentation to retrieve mesh shader output. // |
9 | | // Used by PIX. // |
10 | | // // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #include "dxc/DXIL/DxilOperations.h" |
14 | | #include "dxc/DXIL/DxilUtil.h" |
15 | | |
16 | | #include "dxc/DXIL/DxilInstructions.h" |
17 | | #include "dxc/DXIL/DxilModule.h" |
18 | | #include "dxc/DxilPIXPasses/DxilPIXPasses.h" |
19 | | #include "dxc/HLSL/DxilGenerationPass.h" |
20 | | #include "dxc/HLSL/DxilSpanAllocator.h" |
21 | | |
22 | | #include "llvm/IR/InstIterator.h" |
23 | | #include "llvm/IR/PassManager.h" |
24 | | #include "llvm/Support/FormattedStream.h" |
25 | | #include "llvm/Transforms/Utils/Local.h" |
26 | | #include <deque> |
27 | | |
28 | | #ifdef _WIN32 |
29 | | #include <winerror.h> |
30 | | #endif |
31 | | |
32 | | #include "PixPassHelpers.h" |
33 | | |
34 | | // Keep these in sync with the same-named value in the debugger application's |
35 | | // WinPixShaderUtils.h |
36 | | |
37 | | constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024; |
38 | | // The actual max size per record is much smaller than this, but it never |
39 | | // hurts to be generous. |
40 | | constexpr size_t CounterOffsetBeyondUsefulData = |
41 | | DebugBufferDumpingGroundSize / 2; |
42 | | |
43 | | // Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp |
44 | | constexpr uint32_t triangleIndexIndicator = 0x1; |
45 | | constexpr uint32_t int32ValueIndicator = 0x2; |
46 | | constexpr uint32_t floatValueIndicator = 0x3; |
47 | | constexpr uint32_t int16ValueIndicator = 0x4; |
48 | | constexpr uint32_t float16ValueIndicator = 0x5; |
49 | | |
50 | | using namespace llvm; |
51 | | using namespace hlsl; |
52 | | using namespace PIXPassHelpers; |
53 | | |
54 | | class DxilPIXMeshShaderOutputInstrumentation : public ModulePass { |
55 | | public: |
56 | | static char ID; // Pass identification, replacement for typeid |
57 | 10 | explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {} |
58 | 10 | StringRef getPassName() const override { |
59 | 10 | return "DXIL mesh shader output instrumentation"; |
60 | 10 | } |
61 | | void applyOptions(PassOptions O) override; |
62 | | bool runOnModule(Module &M) override; |
63 | | |
64 | | private: |
65 | | CallInst *m_OutputUAV = nullptr; |
66 | | int m_RemainingReservedSpaceInBytes = 0; |
67 | | Constant *m_OffsetMask = nullptr; |
68 | | SmallVector<Value *, 2> m_threadUniquifier; |
69 | | |
70 | | uint64_t m_UAVSize = 1024 * 1024; |
71 | | bool m_ExpandPayload = false; |
72 | | uint32_t m_DispatchArgumentY = 1; |
73 | | uint32_t m_DispatchArgumentZ = 1; |
74 | | |
75 | | struct BuilderContext { |
76 | | Module &M; |
77 | | DxilModule &DM; |
78 | | LLVMContext &Ctx; |
79 | | OP *HlslOP; |
80 | | IRBuilder<> &Builder; |
81 | | }; |
82 | | |
83 | | SmallVector<Value *, 2> insertInstructionsToCreateDisambiguationValue( |
84 | | IRBuilder<> &Builder, OP *HlslOP, LLVMContext &Ctx, |
85 | | StructType *originalPayloadStructType, Instruction *firstGetPayload); |
86 | | Value *reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes); |
87 | | uint32_t UAVDumpingGroundOffset(); |
88 | | Value *writeDwordAndReturnNewOffset(BuilderContext &BC, Value *TheOffset, |
89 | | Value *TheValue); |
90 | | template <typename... T> void Instrument(BuilderContext &BC, T... values); |
91 | | }; |
92 | | |
93 | 10 | void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) { |
94 | 10 | GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024); |
95 | 10 | GetPassOptionBool(O, "expand-payload", &m_ExpandPayload, 0); |
96 | 10 | GetPassOptionUInt32(O, "dispatchArgY", &m_DispatchArgumentY, 1); |
97 | 10 | GetPassOptionUInt32(O, "dispatchArgZ", &m_DispatchArgumentZ, 1); |
98 | 10 | } |
99 | | |
100 | 48 | uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() { |
101 | 48 | return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize); |
102 | 48 | } |
103 | | |
104 | | Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace( |
105 | 40 | BuilderContext &BC, uint32_t SpaceInBytes) { |
106 | | // Check the previous caller didn't reserve too much space: |
107 | 40 | assert(m_RemainingReservedSpaceInBytes == 0); |
108 | | |
109 | | // Check that the caller didn't ask for so much memory that it will |
110 | | // overwrite the offset counter: |
111 | 40 | assert(m_RemainingReservedSpaceInBytes < (int)CounterOffsetBeyondUsefulData); |
112 | | |
113 | 40 | m_RemainingReservedSpaceInBytes = SpaceInBytes; |
114 | | |
115 | | // Insert the UAV increment instruction: |
116 | 40 | Function *AtomicOpFunc = |
117 | 40 | BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx)); |
118 | 40 | Constant *AtomicBinOpcode = |
119 | 40 | BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp); |
120 | 40 | Constant *AtomicAdd = |
121 | 40 | BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add); |
122 | 40 | Constant *OffsetArg = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + |
123 | 40 | CounterOffsetBeyondUsefulData); |
124 | 40 | UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
125 | | |
126 | 40 | Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes); |
127 | | |
128 | 40 | auto *PreviousValue = BC.Builder.CreateCall( |
129 | 40 | AtomicOpFunc, |
130 | 40 | { |
131 | 40 | AtomicBinOpcode, // i32, ; opcode |
132 | 40 | m_OutputUAV, // %dx.types.Handle, ; resource handle |
133 | 40 | AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, |
134 | | // XOR, IMIN, IMAX, UMIN, UMAX |
135 | 40 | OffsetArg, // i32, ; coordinate c0: index in bytes |
136 | 40 | UndefArg, // i32, ; coordinate c1 (unused) |
137 | 40 | UndefArg, // i32, ; coordinate c2 (unused) |
138 | 40 | Increment, // i32); increment value |
139 | 40 | }, |
140 | 40 | "UAVIncResult"); |
141 | | |
142 | 40 | return BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit"); |
143 | 40 | } |
144 | | |
145 | | Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset( |
146 | 312 | BuilderContext &BC, Value *TheOffset, Value *TheValue) { |
147 | | |
148 | 312 | Function *StoreValue = |
149 | 312 | BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx)); |
150 | 312 | Constant *StoreValueOpcode = |
151 | 312 | BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore); |
152 | 312 | UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
153 | 312 | Constant *WriteMask_X = BC.HlslOP->GetI8Const(1); |
154 | | |
155 | 312 | (void)BC.Builder.CreateCall( |
156 | 312 | StoreValue, |
157 | 312 | {StoreValueOpcode, // i32 opcode |
158 | 312 | m_OutputUAV, // %dx.types.Handle, ; resource handle |
159 | 312 | TheOffset, // i32 c0: index in bytes into UAV |
160 | 312 | Undef32Arg, // i32 c1: unused |
161 | 312 | TheValue, |
162 | 312 | Undef32Arg, // unused values |
163 | 312 | Undef32Arg, // unused values |
164 | 312 | Undef32Arg, // unused values |
165 | 312 | WriteMask_X}); |
166 | | |
167 | 312 | m_RemainingReservedSpaceInBytes -= sizeof(uint32_t); |
168 | 312 | assert(m_RemainingReservedSpaceInBytes >= |
169 | 312 | 0); // or else the caller didn't reserve enough space |
170 | | |
171 | 312 | return BC.Builder.CreateAdd( |
172 | 312 | TheOffset, |
173 | 312 | BC.HlslOP->GetU32Const(static_cast<unsigned int>(sizeof(uint32_t)))); |
174 | 312 | } |
175 | | |
176 | | template <typename... T> |
177 | | void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC, |
178 | 40 | T... values) { |
179 | 40 | llvm::SmallVector<llvm::Value *, 10> Values( |
180 | 40 | {static_cast<llvm::Value *>(values)...}); |
181 | 40 | const uint32_t DwordCount = Values.size(); |
182 | 40 | llvm::Value *byteOffset = |
183 | 40 | reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t)); |
184 | 312 | for (llvm::Value *V : Values) { |
185 | 312 | byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V); |
186 | 312 | } |
187 | 40 | } void DxilPIXMeshShaderOutputInstrumentation::Instrument<llvm::Constant*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*>(DxilPIXMeshShaderOutputInstrumentation::BuilderContext&, llvm::Constant*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*) Line | Count | Source | 178 | 8 | T... values) { | 179 | 8 | llvm::SmallVector<llvm::Value *, 10> Values( | 180 | 8 | {static_cast<llvm::Value *>(values)...}); | 181 | 8 | const uint32_t DwordCount = Values.size(); | 182 | 8 | llvm::Value *byteOffset = | 183 | 8 | reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t)); | 184 | 56 | for (llvm::Value *V : Values) { | 185 | 56 | byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V); | 186 | 56 | } | 187 | 8 | } |
void DxilPIXMeshShaderOutputInstrumentation::Instrument<llvm::Constant*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*>(DxilPIXMeshShaderOutputInstrumentation::BuilderContext&, llvm::Constant*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*) Line | Count | Source | 178 | 32 | T... values) { | 179 | 32 | llvm::SmallVector<llvm::Value *, 10> Values( | 180 | 32 | {static_cast<llvm::Value *>(values)...}); | 181 | 32 | const uint32_t DwordCount = Values.size(); | 182 | 32 | llvm::Value *byteOffset = | 183 | 32 | reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t)); | 184 | 256 | for (llvm::Value *V : Values) { | 185 | 256 | byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V); | 186 | 256 | } | 187 | 32 | } |
|
188 | | |
189 | | Value *GetValueFromExpandedPayload(IRBuilder<> &Builder, |
190 | | StructType *originalPayloadStructType, |
191 | | Instruction *firstGetPayload, |
192 | 12 | unsigned int offset, const char *name) { |
193 | 12 | auto *DerefPointer = Builder.getInt32(0); |
194 | 12 | auto *OffsetToExpandedData = Builder.getInt32(offset); |
195 | 12 | auto *GEP = Builder.CreateGEP( |
196 | 12 | cast<PointerType>(firstGetPayload->getType()->getScalarType()) |
197 | 12 | ->getElementType(), |
198 | 12 | firstGetPayload, {DerefPointer, OffsetToExpandedData}); |
199 | 12 | return Builder.CreateLoad(GEP, name); |
200 | 12 | } |
201 | | |
202 | | SmallVector<Value *, 2> DxilPIXMeshShaderOutputInstrumentation:: |
203 | | insertInstructionsToCreateDisambiguationValue( |
204 | | IRBuilder<> &Builder, OP *HlslOP, LLVMContext &Ctx, |
205 | 8 | StructType *originalPayloadStructType, Instruction *firstGetPayload) { |
206 | | |
207 | | // When a mesh shader is called from an amplification shader, all of the |
208 | | // thread id values are relative to the DispatchMesh call made by |
209 | | // that amplification shader. Data about what thread counts were passed |
210 | | // by the CPU to *CommandList::DispatchMesh are not available, but we |
211 | | // will have added that value to the AS->MS payload... |
212 | | |
213 | 8 | SmallVector<Value *, 2> ret; |
214 | 8 | Constant *Zero32Arg = HlslOP->GetU32Const(0); |
215 | | |
216 | 8 | bool AmplificationShaderIsActive = originalPayloadStructType != nullptr; |
217 | | |
218 | 8 | llvm::Value *ASDispatchMeshYCount = nullptr; |
219 | 8 | llvm::Value *ASDispatchMeshZCount = nullptr; |
220 | 8 | if (AmplificationShaderIsActive) { |
221 | | |
222 | 4 | auto *ASThreadId = GetValueFromExpandedPayload( |
223 | 4 | Builder, originalPayloadStructType, firstGetPayload, |
224 | 4 | originalPayloadStructType->getStructNumElements(), "ASThreadId"); |
225 | 4 | ret.push_back(ASThreadId); |
226 | 4 | ASDispatchMeshYCount = GetValueFromExpandedPayload( |
227 | 4 | Builder, originalPayloadStructType, firstGetPayload, |
228 | 4 | originalPayloadStructType->getStructNumElements() + 1, |
229 | 4 | "ASDispatchMeshYCount"); |
230 | 4 | ASDispatchMeshZCount = GetValueFromExpandedPayload( |
231 | 4 | Builder, originalPayloadStructType, firstGetPayload, |
232 | 4 | originalPayloadStructType->getStructNumElements() + 2, |
233 | 4 | "ASDispatchMeshZCount"); |
234 | 4 | } else { |
235 | 4 | ret.push_back(Zero32Arg); |
236 | 4 | } |
237 | | |
238 | 8 | Constant *One32Arg = HlslOP->GetU32Const(1); |
239 | 8 | Constant *Two32Arg = HlslOP->GetU32Const(2); |
240 | | |
241 | 8 | auto GroupIdFunc = |
242 | 8 | HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(Ctx)); |
243 | 8 | Constant *Opcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId); |
244 | 8 | auto *GroupIdX = |
245 | 8 | Builder.CreateCall(GroupIdFunc, {Opcode, Zero32Arg}, "GroupIdX"); |
246 | 8 | auto *GroupIdY = |
247 | 8 | Builder.CreateCall(GroupIdFunc, {Opcode, One32Arg}, "GroupIdY"); |
248 | 8 | auto *GroupIdZ = |
249 | 8 | Builder.CreateCall(GroupIdFunc, {Opcode, Two32Arg}, "GroupIdZ"); |
250 | | |
251 | | // flattend group number = z + y*numZ + x*numY*numZ |
252 | 8 | if (AmplificationShaderIsActive) { |
253 | 4 | auto *GroupYxNumZ = Builder.CreateMul(GroupIdY, ASDispatchMeshZCount); |
254 | 4 | auto *FlatGroupNumZY = Builder.CreateAdd(GroupIdZ, GroupYxNumZ); |
255 | 4 | auto *GroupXxNumZ = Builder.CreateMul(GroupIdX, ASDispatchMeshZCount); |
256 | 4 | auto *GroupXxNumYZ = Builder.CreateMul(GroupXxNumZ, ASDispatchMeshYCount); |
257 | 4 | auto *FlatGroupNum = Builder.CreateAdd(GroupXxNumYZ, FlatGroupNumZY); |
258 | 4 | ret.push_back(FlatGroupNum); |
259 | 4 | } else { |
260 | 4 | auto *GroupYxNumZ = |
261 | 4 | Builder.CreateMul(GroupIdY, HlslOP->GetU32Const(m_DispatchArgumentZ)); |
262 | 4 | auto *FlatGroupNumZY = Builder.CreateAdd(GroupIdZ, GroupYxNumZ); |
263 | 4 | auto *GroupXxNumYZ = |
264 | 4 | Builder.CreateMul(GroupIdX, HlslOP->GetU32Const(m_DispatchArgumentY * |
265 | 4 | m_DispatchArgumentZ)); |
266 | 4 | auto *FlatGroupNum = Builder.CreateAdd(GroupXxNumYZ, FlatGroupNumZY); |
267 | 4 | ret.push_back(FlatGroupNum); |
268 | 4 | } |
269 | | |
270 | 8 | return ret; |
271 | 8 | } |
272 | | |
273 | 10 | bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) { |
274 | 10 | DxilModule &DM = M.GetOrCreateDxilModule(); |
275 | 10 | LLVMContext &Ctx = M.getContext(); |
276 | 10 | OP *HlslOP = DM.GetOP(); |
277 | | |
278 | 10 | Type *OriginalPayloadStructType = nullptr; |
279 | 10 | ExpandedStruct expanded = {}; |
280 | 10 | Instruction *FirstNewStructGetMeshPayload = nullptr; |
281 | 10 | if (m_ExpandPayload) { |
282 | 6 | Instruction *getMeshPayloadInstructions = nullptr; |
283 | 6 | llvm::Function *entryFunction = PIXPassHelpers::GetEntryFunction(DM); |
284 | 6 | for (inst_iterator I = inst_begin(entryFunction), |
285 | 6 | E = inst_end(entryFunction); |
286 | 32 | I != E; ++I26 ) { |
287 | 30 | if (auto *Instr = llvm::cast<Instruction>(&*I)) { |
288 | 30 | if (hlsl::OP::IsDxilOpFuncCallInst(Instr, |
289 | 30 | hlsl::OP::OpCode::GetMeshPayload)) { |
290 | 4 | getMeshPayloadInstructions = Instr; |
291 | 4 | Type *OriginalPayloadStructPointerType = Instr->getType(); |
292 | 4 | OriginalPayloadStructType = |
293 | 4 | OriginalPayloadStructPointerType->getPointerElementType(); |
294 | | // The validator assures that there is only one call to |
295 | | // GetMeshPayload... |
296 | 4 | break; |
297 | 4 | } |
298 | 30 | } |
299 | 30 | } |
300 | | |
301 | 6 | if (OriginalPayloadStructType == nullptr) { |
302 | | // If the application used no payload, then we won't attempt to add one. |
303 | | // TODO: Is there a credible use case with no AS->MS payload? |
304 | | // PIX bug #35288335 |
305 | 2 | return false; |
306 | 2 | } |
307 | | |
308 | 4 | if (expanded.ExpandedPayloadStructPtrType == nullptr) { |
309 | 4 | expanded = ExpandStructType(Ctx, OriginalPayloadStructType); |
310 | 4 | } |
311 | | |
312 | 4 | if (getMeshPayloadInstructions != nullptr) { |
313 | | |
314 | 4 | Function *DxilFunc = HlslOP->GetOpFunc( |
315 | 4 | OP::OpCode::GetMeshPayload, expanded.ExpandedPayloadStructPtrType); |
316 | 4 | Constant *opArg = |
317 | 4 | HlslOP->GetU32Const((unsigned)OP::OpCode::GetMeshPayload); |
318 | 4 | IRBuilder<> Builder(getMeshPayloadInstructions); |
319 | 4 | Value *args[] = {opArg}; |
320 | 4 | Instruction *payload = Builder.CreateCall(DxilFunc, args); |
321 | | |
322 | 4 | if (FirstNewStructGetMeshPayload == nullptr) { |
323 | 4 | FirstNewStructGetMeshPayload = payload; |
324 | 4 | } |
325 | | |
326 | 4 | ReplaceAllUsesOfInstructionWithNewValueAndDeleteInstruction( |
327 | 4 | getMeshPayloadInstructions, payload, |
328 | 4 | expanded.ExpandedPayloadStructType); |
329 | 4 | } |
330 | 4 | } |
331 | | |
332 | 8 | Instruction *firstInsertionPt = |
333 | 8 | dxilutil::FirstNonAllocaInsertionPt(GetEntryFunction(DM)); |
334 | 8 | IRBuilder<> Builder(firstInsertionPt); |
335 | | |
336 | 8 | BuilderContext BC{M, DM, Ctx, HlslOP, Builder}; |
337 | | |
338 | 8 | m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1); |
339 | | |
340 | 8 | m_OutputUAV = CreateUAVOnceForModule(DM, Builder, 0, "PIX_DebugUAV_Handle"); |
341 | | |
342 | 8 | if (FirstNewStructGetMeshPayload == nullptr) { |
343 | 4 | Instruction *firstInsertionPt = dxilutil::FirstNonAllocaInsertionPt( |
344 | 4 | PIXPassHelpers::GetEntryFunction(DM)); |
345 | 4 | IRBuilder<> Builder(firstInsertionPt); |
346 | 4 | m_threadUniquifier = insertInstructionsToCreateDisambiguationValue( |
347 | 4 | Builder, HlslOP, Ctx, nullptr, nullptr); |
348 | 4 | } else { |
349 | 4 | IRBuilder<> Builder(FirstNewStructGetMeshPayload->getNextNode()); |
350 | 4 | m_threadUniquifier = insertInstructionsToCreateDisambiguationValue( |
351 | 4 | Builder, HlslOP, Ctx, cast<StructType>(OriginalPayloadStructType), |
352 | 4 | FirstNewStructGetMeshPayload); |
353 | 4 | } |
354 | | |
355 | 8 | auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx)); |
356 | 8 | auto FunctionUses = F->uses(); |
357 | 16 | for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { |
358 | 8 | auto &FunctionUse = *FI++; |
359 | 8 | auto FunctionUser = FunctionUse.getUser(); |
360 | | |
361 | 8 | auto Call = cast<CallInst>(FunctionUser); |
362 | | |
363 | 8 | IRBuilder<> Builder2(Call); |
364 | 8 | BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2}; |
365 | | |
366 | 8 | Instrument(BC2, BC2.HlslOP->GetI32Const(triangleIndexIndicator), |
367 | 8 | m_threadUniquifier[0], m_threadUniquifier[1], |
368 | 8 | Call->getOperand(1), Call->getOperand(2), Call->getOperand(3), |
369 | 8 | Call->getOperand(4)); |
370 | 8 | } |
371 | | |
372 | 8 | struct OutputType { |
373 | 8 | Type *type; |
374 | 8 | uint32_t tag; |
375 | 8 | }; |
376 | 8 | SmallVector<OutputType, 4> StoreVertexOutputOverloads{ |
377 | 8 | {Type::getInt32Ty(Ctx), int32ValueIndicator}, |
378 | 8 | {Type::getInt16Ty(Ctx), int16ValueIndicator}, |
379 | 8 | {Type::getFloatTy(Ctx), floatValueIndicator}, |
380 | 8 | {Type::getHalfTy(Ctx), float16ValueIndicator}}; |
381 | | |
382 | 32 | for (auto const &Overload : StoreVertexOutputOverloads) { |
383 | 32 | F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Overload.type); |
384 | 32 | FunctionUses = F->uses(); |
385 | 64 | for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) { |
386 | 32 | auto &FunctionUse = *FI++; |
387 | 32 | auto FunctionUser = FunctionUse.getUser(); |
388 | | |
389 | 32 | auto Call = cast<CallInst>(FunctionUser); |
390 | | |
391 | 32 | IRBuilder<> Builder2(Call); |
392 | 32 | BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2}; |
393 | | |
394 | | // Expand column index to 32 bits: |
395 | 32 | auto ColumnIndex = BC2.Builder.CreateCast( |
396 | 32 | Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx)); |
397 | | |
398 | | // Coerce actual value to int32 |
399 | 32 | Value *CoercedValue = Call->getOperand(4); |
400 | | |
401 | 32 | if (Overload.tag == floatValueIndicator) { |
402 | 32 | CoercedValue = BC2.Builder.CreateCast( |
403 | 32 | Instruction::BitCast, CoercedValue, Type::getInt32Ty(Ctx)); |
404 | 32 | } else if (0 Overload.tag == float16ValueIndicator0 ) { |
405 | 0 | auto *HalfInt = BC2.Builder.CreateCast( |
406 | 0 | Instruction::BitCast, CoercedValue, Type::getInt16Ty(Ctx)); |
407 | |
|
408 | 0 | CoercedValue = BC2.Builder.CreateCast(Instruction::ZExt, HalfInt, |
409 | 0 | Type::getInt32Ty(Ctx)); |
410 | 0 | } else if (Overload.tag == int16ValueIndicator) { |
411 | 0 | CoercedValue = BC2.Builder.CreateCast(Instruction::ZExt, CoercedValue, |
412 | 0 | Type::getInt32Ty(Ctx)); |
413 | 0 | } |
414 | | |
415 | 32 | Instrument(BC2, BC2.HlslOP->GetI32Const(Overload.tag), |
416 | 32 | m_threadUniquifier[0], m_threadUniquifier[1], |
417 | 32 | Call->getOperand(1), Call->getOperand(2), ColumnIndex, |
418 | 32 | CoercedValue, Call->getOperand(5)); |
419 | 32 | } |
420 | 32 | } |
421 | | |
422 | 8 | DM.ReEmitDxilResources(); |
423 | | |
424 | 8 | return true; |
425 | 10 | } |
426 | | |
427 | | char DxilPIXMeshShaderOutputInstrumentation::ID = 0; |
428 | | |
429 | 0 | ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation() { |
430 | 0 | return new DxilPIXMeshShaderOutputInstrumentation(); |
431 | 0 | } |
432 | | |
433 | | INITIALIZE_PASS(DxilPIXMeshShaderOutputInstrumentation, |
434 | | "hlsl-dxil-pix-meshshader-output-instrumentation", |
435 | | "DXIL mesh shader output instrumentation for PIX", false, false) |