gpu/
chunk_impl.rs

1use crate::chunk::ScopeUniqueMap;
2use crate::chunk_scope::{ChunkScope, TID_MAX_LEN};
3/// Linear mapping for 1D array.
4/// N is the number of thread dimensions.
5/// width is the chunking window.
6/// The array is divided into chunks along threads until all elements are covered.
7///
8/// ## Examples
9///
10/// Given:
11/// bdim = (x=4, y=1, z=1)
12/// gdim = (x=1, y=1, z=1)
13/// arr: [T; 8]
14/// width = 2
15///
16/// With `width = 2`, the mapping assign two continuous elements to thread 0-3:
17/// [0, 0, 1, 1, 2, 2, 3, 3]
18///
19/// width = 1, the mapping assign two non-continuous elements to thread 0-3:
20/// [0, 1, 2, 3, 0, 1, 2, 3]
21///
22/// TODO: deprecate MapLinear and MapLinearWithDim in favor of reshape_map! macro.
23#[derive(Copy, Clone)]
24pub struct MapLinearWithDim<const N: usize = 3> {
25    width: usize,
26}
27
28/// TODO: deprecate MapLinear in favor of reshape_map! macro.
29pub type MapLinear = MapLinearWithDim<3>;
30
31impl<const N: usize> MapLinearWithDim<N> {
32    #[gpu_codegen::device]
33    #[gpu_codegen::ret_sync_data(1000)]
34    pub fn new(width: usize) -> Self {
35        Self { width }
36    }
37}
38
39/// # Safety
40/// It is safe to use this mapping as long as the thread dimensions are properly
41/// configured.
42unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapLinearWithDim<1> {
43    type IndexType = usize;
44    type GlobalIndexType = usize;
45
46    #[inline]
47    #[gpu_codegen::device]
48    fn precondition(&self) -> bool {
49        CS::global_dim_y() == 1 && CS::global_dim_z() == 1
50    }
51
52    #[inline]
53    #[gpu_codegen::device]
54    fn map(
55        &self,
56        idx: Self::IndexType,
57        thread_ids: [u32; TID_MAX_LEN],
58    ) -> (bool, Self::GlobalIndexType) {
59        ScopeUniqueMap::<CS>::map(&MapLinearWithDim::<3>::new(self.width), idx, thread_ids)
60    }
61}
62
63unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapLinearWithDim<2> {
64    type IndexType = usize;
65    type GlobalIndexType = usize;
66
67    #[inline]
68    #[gpu_codegen::device]
69    fn precondition(&self) -> bool {
70        CS::global_dim_z() == 1
71    }
72
73    #[inline]
74    #[gpu_codegen::device]
75    fn map(
76        &self,
77        idx: Self::IndexType,
78        thread_ids: [u32; TID_MAX_LEN],
79    ) -> (bool, Self::GlobalIndexType) {
80        ScopeUniqueMap::<CS>::map(&MapLinearWithDim::<3>::new(self.width), idx, thread_ids)
81    }
82}
83
84unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapLinearWithDim<3> {
85    type IndexType = usize;
86    type GlobalIndexType = usize;
87
88    #[inline]
89    #[gpu_codegen::device]
90    fn map(
91        &self,
92        idx: Self::IndexType,
93        thread_ids: [u32; TID_MAX_LEN],
94    ) -> (bool, Self::GlobalIndexType) {
95        let x_id = CS::global_id_x(thread_ids);
96        let y_id = CS::global_id_y(thread_ids);
97        let z_id = CS::global_id_z(thread_ids);
98        let global_thread_id =
99            (x_id + (z_id * CS::global_dim_y() + y_id) * CS::global_dim_x()) as usize;
100        let stride = self.width;
101        let total_dim = (CS::global_dim_x() * CS::global_dim_y() * CS::global_dim_z()) as usize;
102        (true, idx % stride + (idx / stride) * stride * total_dim + global_thread_id * stride)
103    }
104}
105
106#[derive(Copy, Clone)]
107pub struct MapContinuousLinear {
108    width: u32,
109}
110
111impl MapContinuousLinear {
112    #[inline]
113    #[gpu_codegen::device]
114    #[gpu_codegen::ret_sync_data(1000)]
115    pub fn new(width: u32) -> Self {
116        Self { width }
117    }
118}
119
120unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapContinuousLinear {
121    type IndexType = u32;
122    type GlobalIndexType = u32;
123
124    #[inline]
125    #[gpu_codegen::device]
126    fn map(
127        &self,
128        idx: Self::IndexType,
129        thread_ids: [u32; TID_MAX_LEN],
130    ) -> (bool, Self::GlobalIndexType) {
131        let x_id = CS::global_id_x(thread_ids);
132        let y_id = CS::global_id_y(thread_ids);
133        let z_id = CS::global_id_z(thread_ids);
134        let global_thread_id = x_id + (z_id * CS::global_dim_y() + y_id) * CS::global_dim_x();
135        (idx < self.width, idx + global_thread_id * self.width)
136    }
137}
138
139/// This mapping strategy is useful when we want to reshape a 1D array into a 2D
140/// array and then distribute one element to a thread one by one until consuming
141/// all. It creates a new non-continuous partition for each thread.
142/// - IndexType is (usize, usize)
143///
144/// Example:
145/// Example:
146/// - array: [T; 20]
147/// - x_size = 5 => y_size = 4
148/// - dim: x=2, y=2, z=1
149/// ```text
150/// 0   1   2   3   4   5
151/// ┌───┬───┬───┬───┬───┐
152/// │0,0│1,0│0,0│1,0│0,0│
153/// ├───┼───┼───┼───┼───┤
154/// │0,1│1,1│0,1│1,1│0,1│
155/// ├───┼───┼───┼───┼───┤
156/// │0,0│1,0│0,0│1,0│0,0│
157/// ├───┼───┼───┼───┼───┤
158/// │0,1│1,1│0,1│1,1│0,1│
159/// └───┴───┴───┴───┴───┘
160/// ```
161/// In this case,
162/// - thread(1,0) and (1,1) should only have access to a shape of 2*2 = 4 elements.
163/// - thread (0,0) and (0,1) have access to a shape of 3*2 = 6 elements.
164#[derive(Copy, Clone)]
165pub struct Map2D {
166    pub x_size: usize,
167}
168
169impl Map2D {
170    #[inline]
171    #[gpu_codegen::device]
172    #[gpu_codegen::ret_sync_data(1000)]
173    pub fn new(x_size: usize) -> Self {
174        Self { x_size }
175    }
176}
177
178unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for Map2D {
179    type IndexType = (usize, usize);
180    type GlobalIndexType = usize;
181
182    #[inline]
183    #[gpu_codegen::device]
184    fn precondition(&self) -> bool {
185        CS::global_dim_z() == 1
186    }
187
188    #[inline]
189    #[gpu_codegen::device]
190    fn map(
191        &self,
192        idx: Self::IndexType,
193        thread_ids: [u32; TID_MAX_LEN],
194    ) -> (bool, Self::GlobalIndexType) {
195        let shape_x = self.x_size;
196        let inner_x = idx.0;
197        let inner_y = idx.1;
198        let x = inner_x * CS::global_dim_x() as usize + CS::global_id_x(thread_ids) as usize;
199        let y = inner_y * CS::global_dim_y() as usize + CS::global_id_y(thread_ids) as usize;
200        (x < shape_x, shape_x * y + x)
201    }
202}
203
204/// Maps a multi-dimensional local index and thread layout into a reshaped global index.
205/// The idea is that the user need to specify the dimensions of logical index (local + thread ids), and the target tensor dimensions, then we map the logical index into the corresponding target multi-dimension tensor.
206/// We have:
207/// - Logical index —  e.g., (threadId, local_access_id) in CUDA.
208/// - Target tensor dimensions — e.g., a tensor of shape $(\hat{D}_0, \hat{D}_1, ... \hat{D}_{N+M})$
209///
210/// We  want to map the logical linear or multi-dimensional index into the corresponding tensor element, possibly in a flattened memory layout.
211/// To ensure we can access the tensor properly, developers need to do linear2vec operation to convert threadid to $tids[D_{N+M}][...][D_N]$, and local_access_id to $lids[D_N][...][D_0]$
212/// The `reshape_map!` macro generates a `MapReshape` struct that reshapes
213/// local thread ID and index to a shape and then creates a linearized combination
214/// of them according to a specified layout or weights to get a global access index.
215/// ## Macro Signature
216/// The macro takes dimension sizes and optional `layout` `offset` to specify the new layout.
217/// ```text
218/// gpu::reshape_map!(
219///     [($local_id_dim, $tensor_dim?),*]  // local id + corresponding tensor dimension
220///     |
221///     [($thread_id_dim, $tensor_dim?),*] // thread id + corresponding tensor dimension
222///     =>
223///     layout: [-?i1, -?i2, ...]          // permutation of dimensions in the new layout
224///     offset: $offset                // optional offset (default 0)
225/// )
226/// ```
227/// - `lid_tensor_Dims`: array expression specifying the shape of local index, and the array shape corresponding to the local index:  
228///   ```math
229///   [(D_0, TD_0), (D_1, TD_1), ..., (D_{N-1}, TD_{N-1})]
230///   ```
231/// - `tid_tensor_Dims`: array expression specifying the shape of thread index, and the array shape corresponding to the local index:  
232///   $[(D_N, TD_N), ..., (D_{N+M-1}, TD_{N+M-1)]$
233/// - When $TD_k$ is omitted, it is assumed to be $D_k$.
234/// - `permutation`: permutation of dimensions in the new layout:  
235///   $[p_0, p_1, ..., p_{N+M-1}]$ (low → high dimension)
236///   - $0 \le p_k < N$ for thread dimensions  
237///   - $N \le p_k < N+M$ for index dimensions  
238///   - Must be a valid constant permutation  [0, 1, 2, 3] or [i0, i1, t0, t1]
239///   - If omitted, defaults to $[0, 1, ..., N+M-1]$
240///   - Negative index `-k` is allowed to indicate $TD_k - 1 - id_k$ instead of $id_k$
241///   - `-0` is allowed  
242///   - `p_k` can be a constant literal or a readable name like `t<k-N>` or `i<k>` for thread and index dimensions
243/// ## Behavior
244/// - Translates `linear_thread_id` and `local_id` to software-defined multi-dimensional thread IDs:  
245///   $lid_k = lid / \prod_{j=0}^{k - 1}(D_j) \mod D_k$
246///   $lid = \sum_{k=0}^{N}(lid_k * \prod_{j=0}^{k - 1}(D_j))$
247///   $tid_k = tid / \prod_{j=N}^{k - 1}(D_{j+N}) \mod D_{k+N}$
248///   $tid = (tid_0, tid_1, ..., tid_{N-1})$ (low → high)
249/// - Merges thread IDs with index IDs:  
250///   $id = (lid_0, ..., lid_{M-1}, tid_0, ..., tid_{N-1})$ (low → high)
251///   Thus, the logical index has dimention $idxDim = [D_0, D_1, ... D_{N+M+1}]$
252/// - Treats array as shape:  
253///   $\hat{D} = [TD_0, TD_1, .., TD_{N+M-1]$
254/// - If $TD_k \ne $D_k$, some threads or indices will be skipped.  
255///   Valid access range:  
256///   $0 \le id_k < \min(D_k, TD_k)$
257/// - Apply reversed flag to index
258///   id'_k =if reversed_k {TD_k - 1 - id_k} else {id_k}
259/// - Global ID
260///   $globalid =  sum_{k=0}^{N+M}( id'_{p_{k}} prod_{j=0}^{k - 1}(TD_{p_j}))$
261/// - Accesses the array in permuted order
262///   $arr[id'_{p_0}][id'_{p_1}]...[id'_{p_{N+M-1}}]$
263///
264/// ## Safety
265/// - Users cannot create `MapReshape` instances outside the macro without `unsafe {}`.
266/// - Sizes must be non-zero; `size = 0` triggers runtime errors and should be treated as functionality errors.  
267///   This will not violate race-free guarantees.
268/// - Guarantees safe mapping for valid permutations to ensure race-free chunking.
269/// ## Examples
270/// See more tests in `chunk_scope::test_reshape_map`.
271/// ### Example 1: No permutation
272/// Similar to `MapLinear(3)` when `num_thread = 4`.
273/// ```rust
274/// gpu::reshape_map!([3] | [4]  => layout: [0, 1]);
275/// ```
276/// which is equivalent to
277/// ```rust
278/// gpu::reshape_map!([3] | [4] => layout: [i0, t0]);
279/// // local index shape: [3]
280/// // thread shape: [4]
281/// // Access: arr[tid0][idx0]
282/// // access -> tid: [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
283/// ```
284/// ### Example 2: Permutation swaps a thread id and local_id
285/// Similar to `MapLinear(1)` when `num_thread = 4, arr.len = 12`.
286/// ```rust
287/// gpu::reshape_map!([3] | [4] => layout: [1, 0]);
288/// ```
289/// which is equivalent to
290/// ```rust
291/// gpu::reshape_map!([3] | [4] => layout: [t0, i0]);
292/// // local index shape: [3]
293/// // thread shape: [4]
294/// // array shape: arr[4][3]
295/// // Access: arr[idx0][tid0]
296/// // access -> tid: [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
297/// ```
298/// ### Example 3: Software-defined thread dimension > 1
299/// ```rust
300/// gpu::reshape_map!([3] | [2, 2] => layout: [0, 2, 1]);
301/// ```
302/// which is equivalent to
303///
304///  ```rust
305/// gpu::reshape_map!([3] | [2, 2] => layout: [i0, t1, t0]);
306/// // local index shape: [3]
307/// // thread shape: [2, 2]
308/// // Access: arr[tid0][tid1][idx0]
309/// // access -> tid: [0, 0, 0, 2, 2, 2, 1, 1, 1, 3, 3, 3]
310/// ```
311/// ### Example 4: Swap tid and extra_id, thread dimension > 1
312/// ```rust
313/// gpu::reshape_map!([3] | [2, 2] => layout: [2, 0, 1]);
314/// gpu::reshape_map!([3] | [2, 2] => layout: [t1, i0, t0]);
315/// // local index shape: [3]
316/// // thread shape: [2, 2]
317/// // array shape: arr[2][2][3]
318/// // Access: arr[tid0][idx0][tid1]
319/// // access -> tid: [0, 2, 0, 2, 0, 2, 1, 3, 1, 3, 1, 3]
320/// ```
321/// ### Example 5: Reverse a thread dimension
322/// ```rust
323/// gpu::reshape_map!([3] | [2, 2] => layout: [0, -1, 2]);
324/// gpu::reshape_map!([3] | [2, 2] => layout: [i0, -t0, t1]);
325/// // local index shape: [3]
326/// // thread shape: [2, 2]
327/// // Access: arr[tid1][(max_tid0 - tid0)][idx0]
328/// ```
329/// ### Example 6: Skip some threads by setting a smaller new size
330/// ```rust
331/// gpu::reshape_map!([3] | [2, (2, 1)] => layout: [0, 1, 2]);
332/// gpu::reshape_map!([3] | [2, (2, 1)] => layout: [i0, t0, t1]);
333/// // local index shape: [3]
334/// // thread shape: [2, 2]
335/// // array shape: arr[1][2][3]
336/// // Access: arr[tid1][tid0][idx0]
337/// // valid access range:
338/// // 0 <= lid0 < 3
339/// // 0 <= tid0 < 2
340/// // 0 <= tid1 < 1
341/// // access -> tid: [0, 0, 0, 1, 1, 1, _, _, _, _, _, _]
342/// ```
343/// ### Example 7: Skip some data by setting a larger size
344/// ```rust
345/// gpu::reshape_map!([(3, 4)] | [2, 2] => layout: [0, 1, 2]);
346/// gpu::reshape_map!([(3, 4)] | [2, 2] => layout: [i0, t0, t1]);
347/// // local index shape: [3]
348/// // thread shape: [2, 2]
349/// // array shape: arr[2][2][4]
350/// // Access: arr[tid1][tid0][idx0]
351/// // access -> tid: [0, 0, 0, _, 1, 1, 1, _, 2, 2, 2, _, 3, 3, 3, _, ...]
352/// ```
353/// ## Invalid Examples
354/// ### Example 1: Invalid permutation (index out of range)
355/// ```rust,compile_fail
356/// gpu::reshape_map!([2] | [2, 3] => layout: [1, 2, 3]);
357/// ```
358/// ### Example 2: Invalid permutation (duplicate indices)
359/// ```rust,compile_fail
360/// gpu::reshape_map!([2] | [2, 3] => layout: [0, 0, 1]);
361/// gpu::reshape_map!([2] | [2, 3] => layout: [t0, t0, i0]);
362/// ```
363/// ### Example 3: Invalid thread dimension (<1)
364/// ```rust,compile_fail
365/// gpu::reshape_map!([2, 3] | [] => layout: [0, 1, 2]);
366/// ```
367/// ### Example 4: Invalid index dimension (<1)
368/// ```rust,compile_fail
369/// gpu::reshape_map!( [] | [2, 2] => layout: [0, 1]);
370/// ```
371#[macro_export]
372macro_rules! reshape_map {
373    ($($any: tt)*) => {
374        $crate::prelude::reshape_map_macro!($crate, $($any)*)
375    };
376}
377
378#[cfg(test)]
379mod test {
380    use num_traits::AsPrimitive;
381
382    use super::*;
383    use crate::chunk_scope::test::MockWarp2ThreadScope;
384
385    macro_rules! assert_access_map {
386        ($cs:ty, $m:expr, $thread_num:expr, $id_num:expr, $local_id_num:expr, $expected:expr) => {
387            let access = get_access_map::<$thread_num, $cs>(&$m, $id_num, $local_id_num);
388            assert!(access == $expected, "access_map = {:?}, expected = {:?}", access, $expected)
389        };
390        ($cs:ty, $m:expr, $thread_num:expr, $id_num:expr, $expected:expr) => {
391            let access = get_access_map::<$thread_num, $cs>(&$m, $id_num, $id_num);
392            assert!(access == $expected, "access_map = {:?}, expected = {:?}", access, $expected)
393        };
394    }
395
396    macro_rules! assert_access_map2 {
397        ($cs:ty, $m:expr, $thread_num:expr, $id_num:expr, $id_num2:expr, $expected:expr) => {
398            let access = get_access_map2::<$thread_num, $cs>(&$m, $id_num, $id_num2);
399            assert!(access == $expected, "access_map = {:?}, expected = {:?}", access, $expected)
400        };
401    }
402
403    fn get_access_map<const NTHREADS: usize, CS>(
404        map: &impl ScopeUniqueMap<CS, IndexType = u32>,
405        n: usize,
406        local_n: usize,
407    ) -> alloc::vec::Vec<isize>
408    where
409        CS: ChunkScope<FromScope = crate::cg::ThreadWarpTile<NTHREADS>, ToScope = crate::cg::Thread>,
410    {
411        let mut access_map = alloc::vec![-1isize; n];
412        assert!(NTHREADS <= 32);
413        for t in 0..NTHREADS {
414            for i in 0..local_n {
415                let tids = [0, t as u32, 0, 0, 0, 0];
416                let (valid, mapped_idx) = ScopeUniqueMap::<CS>::map(map, i as u32, tids);
417                let mapped_idx: usize = mapped_idx.as_();
418                if valid && mapped_idx < n {
419                    access_map[mapped_idx] = t as _;
420                }
421            }
422        }
423        access_map
424    }
425
426    fn get_access_map2<const NTHREADS: usize, CS>(
427        map: &impl ScopeUniqueMap<CS, IndexType = (u32, u32)>,
428        n: usize,
429        m: (usize, usize),
430    ) -> alloc::vec::Vec<isize>
431    where
432        CS: ChunkScope<FromScope = crate::cg::ThreadWarpTile<NTHREADS>, ToScope = crate::cg::Thread>,
433    {
434        let mut access_map = alloc::vec![-1isize; n];
435        assert!(NTHREADS <= 32);
436        for t in 0..NTHREADS {
437            for i in 0..m.0 {
438                for j in 0..m.1 {
439                    let tids = [0, t as u32, 0, 0, 0, 0];
440                    let (valid, mapped_idx) =
441                        ScopeUniqueMap::<CS>::map(map, (i as u32, j as u32), tids);
442                    let mapped_idx: usize = mapped_idx.as_();
443                    if valid && mapped_idx < n {
444                        access_map[mapped_idx] = t as _;
445                    }
446                }
447            }
448        }
449        access_map
450    }
451
452    #[test]
453    pub(crate) fn test_reshape_map_example3() {
454        type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
455        let map_reshape = reshape_map!([3] | [2, 2] => layout: [0, 2, 1]);
456        assert_access_map!(S, map_reshape, 4, 12, [0, 0, 0, 2, 2, 2, 1, 1, 1, 3, 3, 3]);
457    }
458
459    #[test]
460    pub(crate) fn test_reshape_map_example4() {
461        type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
462        let map_reshape = reshape_map!([3] | [2, 2] => layout: [2, 0, 1]);
463        assert_access_map!(S, map_reshape, 4, 12, [0, 2, 0, 2, 0, 2, 1, 3, 1, 3, 1, 3]);
464        let map_reshape = reshape_map!([3] | [2, 2] => layout: [t1, i0, t0]);
465        assert_access_map!(S, map_reshape, 4, 12, [0, 2, 0, 2, 0, 2, 1, 3, 1, 3, 1, 3]);
466    }
467
468    #[test]
469    pub(crate) fn test_reshape_map_example5() {
470        type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
471        let map_reshape = crate::reshape_map!([3] | [2, 2] => layout: [i0, -t0, t1]);
472        assert_access_map!(S, map_reshape, 4, 12, [1, 1, 1, 0, 0, 0, 3, 3, 3, 2, 2, 2]);
473    }
474
475    #[test]
476    pub(crate) fn test_reshape_map_example5_2() {
477        type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
478        let map_reshape = crate::reshape_map!([3] | [2, 2] => layout: [-t0, -i0, t1]);
479        assert_access_map!(S, map_reshape, 4, 12, 3, [1, 0, 1, 0, 1, 0, 3, 2, 3, 2, 3, 2]);
480    }
481
482    #[test]
483    pub(crate) fn test_reshape_map_example6() {
484        type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
485        // Skip some threads by setting a smaller new size
486        let map_reshape = crate::reshape_map!([3] | [2, (2, 1)] => layout: [i0, t0, t1]);
487        // warp2thread and so use tid[1] only
488        assert_access_map!(S, map_reshape, 4, 12, [0, 0, 0, 1, 1, 1, -1, -1, -1, -1, -1, -1]);
489    }
490
491    #[test]
492    pub(crate) fn test_reshape_map_example7() {
493        type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
494        //Skip some data by setting a larger size
495        let map_reshape = crate::reshape_map!([(3, 4)] | [2, 2] => layout: [i0, t0, t1]);
496        assert_access_map!(S, map_reshape, 4, 12, [0, 0, 0, -1, 1, 1, 1, -1, 2, 2, 2, -1]);
497    }
498
499    #[test]
500    pub(crate) fn test_reshape_map_example8() {
501        type S = MockWarp2ThreadScope<8, 0>; // a group of 8 threads.
502        // Skip some threads by setting a smaller new size
503        let map_reshape = crate::reshape_map!([1, 3] | [(4, 2), 2] => layout: [i0, t0, t1, i1]);
504        // warp2thread and so use tid[1] only
505        assert_access_map2!(S, map_reshape, 8, 12, (1, 3), [0, 1, 4, 5, 0, 1, 4, 5, 0, 1, 4, 5]);
506    }
507
508    #[test]
509    pub(crate) fn test_reshape_map_example9() {
510        type S = MockWarp2ThreadScope<8, 0>; // a group of 8 threads.
511        // Skip some threads by setting a smaller new size
512        let map_reshape = crate::reshape_map!([3] | [(4, 2), 2] => layout: [t0, t1, i0]);
513        // warp2thread and so use tid[1] only
514        assert_access_map!(S, map_reshape, 8, 12, [0, 1, 4, 5, 0, 1, 4, 5, 0, 1, 4, 5]);
515    }
516}
gpu/chunk_impl.rs

gpu/
chunk_impl.rs