gpu/chunk_impl.rs
1use crate::chunk::ScopeUniqueMap;
2use crate::chunk_scope::{ChunkScope, TID_MAX_LEN};
3/// Linear mapping for 1D array.
4/// N is the number of thread dimensions.
5/// width is the chunking window.
6/// The array is divided into chunks along threads until all elements are covered.
7///
8/// ## Examples
9///
10/// Given:
11/// bdim = (x=4, y=1, z=1)
12/// gdim = (x=1, y=1, z=1)
13/// arr: [T; 8]
14/// width = 2
15///
16/// With `width = 2`, the mapping assign two continuous elements to thread 0-3:
17/// [0, 0, 1, 1, 2, 2, 3, 3]
18///
19/// width = 1, the mapping assign two non-continuous elements to thread 0-3:
20/// [0, 1, 2, 3, 0, 1, 2, 3]
21///
22/// TODO: deprecate MapLinear and MapLinearWithDim in favor of reshape_map! macro.
23#[derive(Copy, Clone)]
24pub struct MapLinearWithDim<const N: usize = 3> {
25 width: usize,
26}
27
28/// TODO: deprecate MapLinear in favor of reshape_map! macro.
29pub type MapLinear = MapLinearWithDim<3>;
30
31impl<const N: usize> MapLinearWithDim<N> {
32 #[gpu_codegen::device]
33 #[gpu_codegen::ret_sync_data(1000)]
34 pub fn new(width: usize) -> Self {
35 Self { width }
36 }
37}
38
39/// # Safety
40/// It is safe to use this mapping as long as the thread dimensions are properly
41/// configured.
42unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapLinearWithDim<1> {
43 type IndexType = usize;
44 type GlobalIndexType = usize;
45
46 #[inline]
47 #[gpu_codegen::device]
48 fn precondition(&self) -> bool {
49 CS::global_dim_y() == 1 && CS::global_dim_z() == 1
50 }
51
52 #[inline]
53 #[gpu_codegen::device]
54 fn map(
55 &self,
56 idx: Self::IndexType,
57 thread_ids: [u32; TID_MAX_LEN],
58 ) -> (bool, Self::GlobalIndexType) {
59 ScopeUniqueMap::<CS>::map(&MapLinearWithDim::<3>::new(self.width), idx, thread_ids)
60 }
61}
62
63unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapLinearWithDim<2> {
64 type IndexType = usize;
65 type GlobalIndexType = usize;
66
67 #[inline]
68 #[gpu_codegen::device]
69 fn precondition(&self) -> bool {
70 CS::global_dim_z() == 1
71 }
72
73 #[inline]
74 #[gpu_codegen::device]
75 fn map(
76 &self,
77 idx: Self::IndexType,
78 thread_ids: [u32; TID_MAX_LEN],
79 ) -> (bool, Self::GlobalIndexType) {
80 ScopeUniqueMap::<CS>::map(&MapLinearWithDim::<3>::new(self.width), idx, thread_ids)
81 }
82}
83
84unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapLinearWithDim<3> {
85 type IndexType = usize;
86 type GlobalIndexType = usize;
87
88 #[inline]
89 #[gpu_codegen::device]
90 fn map(
91 &self,
92 idx: Self::IndexType,
93 thread_ids: [u32; TID_MAX_LEN],
94 ) -> (bool, Self::GlobalIndexType) {
95 let x_id = CS::global_id_x(thread_ids);
96 let y_id = CS::global_id_y(thread_ids);
97 let z_id = CS::global_id_z(thread_ids);
98 let global_thread_id =
99 (x_id + (z_id * CS::global_dim_y() + y_id) * CS::global_dim_x()) as usize;
100 let stride = self.width;
101 let total_dim = (CS::global_dim_x() * CS::global_dim_y() * CS::global_dim_z()) as usize;
102 (true, idx % stride + (idx / stride) * stride * total_dim + global_thread_id * stride)
103 }
104}
105
106#[derive(Copy, Clone)]
107pub struct MapContinuousLinear {
108 width: u32,
109}
110
111impl MapContinuousLinear {
112 #[inline]
113 #[gpu_codegen::device]
114 #[gpu_codegen::ret_sync_data(1000)]
115 pub fn new(width: u32) -> Self {
116 Self { width }
117 }
118}
119
120unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for MapContinuousLinear {
121 type IndexType = u32;
122 type GlobalIndexType = u32;
123
124 #[inline]
125 #[gpu_codegen::device]
126 fn map(
127 &self,
128 idx: Self::IndexType,
129 thread_ids: [u32; TID_MAX_LEN],
130 ) -> (bool, Self::GlobalIndexType) {
131 let x_id = CS::global_id_x(thread_ids);
132 let y_id = CS::global_id_y(thread_ids);
133 let z_id = CS::global_id_z(thread_ids);
134 let global_thread_id = x_id + (z_id * CS::global_dim_y() + y_id) * CS::global_dim_x();
135 (idx < self.width, idx + global_thread_id * self.width)
136 }
137}
138
139/// This mapping strategy is useful when we want to reshape a 1D array into a 2D
140/// array and then distribute one element to a thread one by one until consuming
141/// all. It creates a new non-continuous partition for each thread.
142/// - IndexType is (usize, usize)
143///
144/// Example:
145/// Example:
146/// - array: [T; 20]
147/// - x_size = 5 => y_size = 4
148/// - dim: x=2, y=2, z=1
149/// ```text
150/// 0 1 2 3 4 5
151/// ┌───┬───┬───┬───┬───┐
152/// │0,0│1,0│0,0│1,0│0,0│
153/// ├───┼───┼───┼───┼───┤
154/// │0,1│1,1│0,1│1,1│0,1│
155/// ├───┼───┼───┼───┼───┤
156/// │0,0│1,0│0,0│1,0│0,0│
157/// ├───┼───┼───┼───┼───┤
158/// │0,1│1,1│0,1│1,1│0,1│
159/// └───┴───┴───┴───┴───┘
160/// ```
161/// In this case,
162/// - thread(1,0) and (1,1) should only have access to a shape of 2*2 = 4 elements.
163/// - thread (0,0) and (0,1) have access to a shape of 3*2 = 6 elements.
164#[derive(Copy, Clone)]
165pub struct Map2D {
166 pub x_size: usize,
167}
168
169impl Map2D {
170 #[inline]
171 #[gpu_codegen::device]
172 #[gpu_codegen::ret_sync_data(1000)]
173 pub fn new(x_size: usize) -> Self {
174 Self { x_size }
175 }
176}
177
178unsafe impl<CS: ChunkScope> ScopeUniqueMap<CS> for Map2D {
179 type IndexType = (usize, usize);
180 type GlobalIndexType = usize;
181
182 #[inline]
183 #[gpu_codegen::device]
184 fn precondition(&self) -> bool {
185 CS::global_dim_z() == 1
186 }
187
188 #[inline]
189 #[gpu_codegen::device]
190 fn map(
191 &self,
192 idx: Self::IndexType,
193 thread_ids: [u32; TID_MAX_LEN],
194 ) -> (bool, Self::GlobalIndexType) {
195 let shape_x = self.x_size;
196 let inner_x = idx.0;
197 let inner_y = idx.1;
198 let x = inner_x * CS::global_dim_x() as usize + CS::global_id_x(thread_ids) as usize;
199 let y = inner_y * CS::global_dim_y() as usize + CS::global_id_y(thread_ids) as usize;
200 (x < shape_x, shape_x * y + x)
201 }
202}
203
204/// Maps a multi-dimensional local index and thread layout into a reshaped global index.
205/// The idea is that the user need to specify the dimensions of logical index (local + thread ids), and the target tensor dimensions, then we map the logical index into the corresponding target multi-dimension tensor.
206/// We have:
207/// - Logical index — e.g., (threadId, local_access_id) in CUDA.
208/// - Target tensor dimensions — e.g., a tensor of shape $(\hat{D}_0, \hat{D}_1, ... \hat{D}_{N+M})$
209///
210/// We want to map the logical linear or multi-dimensional index into the corresponding tensor element, possibly in a flattened memory layout.
211/// To ensure we can access the tensor properly, developers need to do linear2vec operation to convert threadid to $tids[D_{N+M}][...][D_N]$, and local_access_id to $lids[D_N][...][D_0]$
212/// The `reshape_map!` macro generates a `MapReshape` struct that reshapes
213/// local thread ID and index to a shape and then creates a linearized combination
214/// of them according to a specified layout or weights to get a global access index.
215/// ## Macro Signature
216/// The macro takes dimension sizes and optional `layout` `offset` to specify the new layout.
217/// ```text
218/// gpu::reshape_map!(
219/// [($local_id_dim, $tensor_dim?),*] // local id + corresponding tensor dimension
220/// |
221/// [($thread_id_dim, $tensor_dim?),*] // thread id + corresponding tensor dimension
222/// =>
223/// layout: [-?i1, -?i2, ...] // permutation of dimensions in the new layout
224/// offset: $offset // optional offset (default 0)
225/// )
226/// ```
227/// - `lid_tensor_Dims`: array expression specifying the shape of local index, and the array shape corresponding to the local index:
228/// ```math
229/// [(D_0, TD_0), (D_1, TD_1), ..., (D_{N-1}, TD_{N-1})]
230/// ```
231/// - `tid_tensor_Dims`: array expression specifying the shape of thread index, and the array shape corresponding to the local index:
232/// $[(D_N, TD_N), ..., (D_{N+M-1}, TD_{N+M-1)]$
233/// - When $TD_k$ is omitted, it is assumed to be $D_k$.
234/// - `permutation`: permutation of dimensions in the new layout:
235/// $[p_0, p_1, ..., p_{N+M-1}]$ (low → high dimension)
236/// - $0 \le p_k < N$ for thread dimensions
237/// - $N \le p_k < N+M$ for index dimensions
238/// - Must be a valid constant permutation [0, 1, 2, 3] or [i0, i1, t0, t1]
239/// - If omitted, defaults to $[0, 1, ..., N+M-1]$
240/// - Negative index `-k` is allowed to indicate $TD_k - 1 - id_k$ instead of $id_k$
241/// - `-0` is allowed
242/// - `p_k` can be a constant literal or a readable name like `t<k-N>` or `i<k>` for thread and index dimensions
243/// ## Behavior
244/// - Translates `linear_thread_id` and `local_id` to software-defined multi-dimensional thread IDs:
245/// $lid_k = lid / \prod_{j=0}^{k - 1}(D_j) \mod D_k$
246/// $lid = \sum_{k=0}^{N}(lid_k * \prod_{j=0}^{k - 1}(D_j))$
247/// $tid_k = tid / \prod_{j=N}^{k - 1}(D_{j+N}) \mod D_{k+N}$
248/// $tid = (tid_0, tid_1, ..., tid_{N-1})$ (low → high)
249/// - Merges thread IDs with index IDs:
250/// $id = (lid_0, ..., lid_{M-1}, tid_0, ..., tid_{N-1})$ (low → high)
251/// Thus, the logical index has dimention $idxDim = [D_0, D_1, ... D_{N+M+1}]$
252/// - Treats array as shape:
253/// $\hat{D} = [TD_0, TD_1, .., TD_{N+M-1]$
254/// - If $TD_k \ne $D_k$, some threads or indices will be skipped.
255/// Valid access range:
256/// $0 \le id_k < \min(D_k, TD_k)$
257/// - Apply reversed flag to index
258/// id'_k =if reversed_k {TD_k - 1 - id_k} else {id_k}
259/// - Global ID
260/// $globalid = sum_{k=0}^{N+M}( id'_{p_{k}} prod_{j=0}^{k - 1}(TD_{p_j}))$
261/// - Accesses the array in permuted order
262/// $arr[id'_{p_0}][id'_{p_1}]...[id'_{p_{N+M-1}}]$
263///
264/// ## Safety
265/// - Users cannot create `MapReshape` instances outside the macro without `unsafe {}`.
266/// - Sizes must be non-zero; `size = 0` triggers runtime errors and should be treated as functionality errors.
267/// This will not violate race-free guarantees.
268/// - Guarantees safe mapping for valid permutations to ensure race-free chunking.
269/// ## Examples
270/// See more tests in `chunk_scope::test_reshape_map`.
271/// ### Example 1: No permutation
272/// Similar to `MapLinear(3)` when `num_thread = 4`.
273/// ```rust
274/// gpu::reshape_map!([3] | [4] => layout: [0, 1]);
275/// ```
276/// which is equivalent to
277/// ```rust
278/// gpu::reshape_map!([3] | [4] => layout: [i0, t0]);
279/// // local index shape: [3]
280/// // thread shape: [4]
281/// // Access: arr[tid0][idx0]
282/// // access -> tid: [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
283/// ```
284/// ### Example 2: Permutation swaps a thread id and local_id
285/// Similar to `MapLinear(1)` when `num_thread = 4, arr.len = 12`.
286/// ```rust
287/// gpu::reshape_map!([3] | [4] => layout: [1, 0]);
288/// ```
289/// which is equivalent to
290/// ```rust
291/// gpu::reshape_map!([3] | [4] => layout: [t0, i0]);
292/// // local index shape: [3]
293/// // thread shape: [4]
294/// // array shape: arr[4][3]
295/// // Access: arr[idx0][tid0]
296/// // access -> tid: [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
297/// ```
298/// ### Example 3: Software-defined thread dimension > 1
299/// ```rust
300/// gpu::reshape_map!([3] | [2, 2] => layout: [0, 2, 1]);
301/// ```
302/// which is equivalent to
303///
304/// ```rust
305/// gpu::reshape_map!([3] | [2, 2] => layout: [i0, t1, t0]);
306/// // local index shape: [3]
307/// // thread shape: [2, 2]
308/// // Access: arr[tid0][tid1][idx0]
309/// // access -> tid: [0, 0, 0, 2, 2, 2, 1, 1, 1, 3, 3, 3]
310/// ```
311/// ### Example 4: Swap tid and extra_id, thread dimension > 1
312/// ```rust
313/// gpu::reshape_map!([3] | [2, 2] => layout: [2, 0, 1]);
314/// gpu::reshape_map!([3] | [2, 2] => layout: [t1, i0, t0]);
315/// // local index shape: [3]
316/// // thread shape: [2, 2]
317/// // array shape: arr[2][2][3]
318/// // Access: arr[tid0][idx0][tid1]
319/// // access -> tid: [0, 2, 0, 2, 0, 2, 1, 3, 1, 3, 1, 3]
320/// ```
321/// ### Example 5: Reverse a thread dimension
322/// ```rust
323/// gpu::reshape_map!([3] | [2, 2] => layout: [0, -1, 2]);
324/// gpu::reshape_map!([3] | [2, 2] => layout: [i0, -t0, t1]);
325/// // local index shape: [3]
326/// // thread shape: [2, 2]
327/// // Access: arr[tid1][(max_tid0 - tid0)][idx0]
328/// ```
329/// ### Example 6: Skip some threads by setting a smaller new size
330/// ```rust
331/// gpu::reshape_map!([3] | [2, (2, 1)] => layout: [0, 1, 2]);
332/// gpu::reshape_map!([3] | [2, (2, 1)] => layout: [i0, t0, t1]);
333/// // local index shape: [3]
334/// // thread shape: [2, 2]
335/// // array shape: arr[1][2][3]
336/// // Access: arr[tid1][tid0][idx0]
337/// // valid access range:
338/// // 0 <= lid0 < 3
339/// // 0 <= tid0 < 2
340/// // 0 <= tid1 < 1
341/// // access -> tid: [0, 0, 0, 1, 1, 1, _, _, _, _, _, _]
342/// ```
343/// ### Example 7: Skip some data by setting a larger size
344/// ```rust
345/// gpu::reshape_map!([(3, 4)] | [2, 2] => layout: [0, 1, 2]);
346/// gpu::reshape_map!([(3, 4)] | [2, 2] => layout: [i0, t0, t1]);
347/// // local index shape: [3]
348/// // thread shape: [2, 2]
349/// // array shape: arr[2][2][4]
350/// // Access: arr[tid1][tid0][idx0]
351/// // access -> tid: [0, 0, 0, _, 1, 1, 1, _, 2, 2, 2, _, 3, 3, 3, _, ...]
352/// ```
353/// ## Invalid Examples
354/// ### Example 1: Invalid permutation (index out of range)
355/// ```rust,compile_fail
356/// gpu::reshape_map!([2] | [2, 3] => layout: [1, 2, 3]);
357/// ```
358/// ### Example 2: Invalid permutation (duplicate indices)
359/// ```rust,compile_fail
360/// gpu::reshape_map!([2] | [2, 3] => layout: [0, 0, 1]);
361/// gpu::reshape_map!([2] | [2, 3] => layout: [t0, t0, i0]);
362/// ```
363/// ### Example 3: Invalid thread dimension (<1)
364/// ```rust,compile_fail
365/// gpu::reshape_map!([2, 3] | [] => layout: [0, 1, 2]);
366/// ```
367/// ### Example 4: Invalid index dimension (<1)
368/// ```rust,compile_fail
369/// gpu::reshape_map!( [] | [2, 2] => layout: [0, 1]);
370/// ```
371#[macro_export]
372macro_rules! reshape_map {
373 ($($any: tt)*) => {
374 $crate::prelude::reshape_map_macro!($crate, $($any)*)
375 };
376}
377
378#[cfg(test)]
379mod test {
380 use num_traits::AsPrimitive;
381
382 use super::*;
383 use crate::chunk_scope::test::MockWarp2ThreadScope;
384
385 macro_rules! assert_access_map {
386 ($cs:ty, $m:expr, $thread_num:expr, $id_num:expr, $local_id_num:expr, $expected:expr) => {
387 let access = get_access_map::<$thread_num, $cs>(&$m, $id_num, $local_id_num);
388 assert!(access == $expected, "access_map = {:?}, expected = {:?}", access, $expected)
389 };
390 ($cs:ty, $m:expr, $thread_num:expr, $id_num:expr, $expected:expr) => {
391 let access = get_access_map::<$thread_num, $cs>(&$m, $id_num, $id_num);
392 assert!(access == $expected, "access_map = {:?}, expected = {:?}", access, $expected)
393 };
394 }
395
396 macro_rules! assert_access_map2 {
397 ($cs:ty, $m:expr, $thread_num:expr, $id_num:expr, $id_num2:expr, $expected:expr) => {
398 let access = get_access_map2::<$thread_num, $cs>(&$m, $id_num, $id_num2);
399 assert!(access == $expected, "access_map = {:?}, expected = {:?}", access, $expected)
400 };
401 }
402
403 fn get_access_map<const NTHREADS: usize, CS>(
404 map: &impl ScopeUniqueMap<CS, IndexType = u32>,
405 n: usize,
406 local_n: usize,
407 ) -> alloc::vec::Vec<isize>
408 where
409 CS: ChunkScope<FromScope = crate::cg::ThreadWarpTile<NTHREADS>, ToScope = crate::cg::Thread>,
410 {
411 let mut access_map = alloc::vec![-1isize; n];
412 assert!(NTHREADS <= 32);
413 for t in 0..NTHREADS {
414 for i in 0..local_n {
415 let tids = [0, t as u32, 0, 0, 0, 0];
416 let (valid, mapped_idx) = ScopeUniqueMap::<CS>::map(map, i as u32, tids);
417 let mapped_idx: usize = mapped_idx.as_();
418 if valid && mapped_idx < n {
419 access_map[mapped_idx] = t as _;
420 }
421 }
422 }
423 access_map
424 }
425
426 fn get_access_map2<const NTHREADS: usize, CS>(
427 map: &impl ScopeUniqueMap<CS, IndexType = (u32, u32)>,
428 n: usize,
429 m: (usize, usize),
430 ) -> alloc::vec::Vec<isize>
431 where
432 CS: ChunkScope<FromScope = crate::cg::ThreadWarpTile<NTHREADS>, ToScope = crate::cg::Thread>,
433 {
434 let mut access_map = alloc::vec![-1isize; n];
435 assert!(NTHREADS <= 32);
436 for t in 0..NTHREADS {
437 for i in 0..m.0 {
438 for j in 0..m.1 {
439 let tids = [0, t as u32, 0, 0, 0, 0];
440 let (valid, mapped_idx) =
441 ScopeUniqueMap::<CS>::map(map, (i as u32, j as u32), tids);
442 let mapped_idx: usize = mapped_idx.as_();
443 if valid && mapped_idx < n {
444 access_map[mapped_idx] = t as _;
445 }
446 }
447 }
448 }
449 access_map
450 }
451
452 #[test]
453 pub(crate) fn test_reshape_map_example3() {
454 type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
455 let map_reshape = reshape_map!([3] | [2, 2] => layout: [0, 2, 1]);
456 assert_access_map!(S, map_reshape, 4, 12, [0, 0, 0, 2, 2, 2, 1, 1, 1, 3, 3, 3]);
457 }
458
459 #[test]
460 pub(crate) fn test_reshape_map_example4() {
461 type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
462 let map_reshape = reshape_map!([3] | [2, 2] => layout: [2, 0, 1]);
463 assert_access_map!(S, map_reshape, 4, 12, [0, 2, 0, 2, 0, 2, 1, 3, 1, 3, 1, 3]);
464 let map_reshape = reshape_map!([3] | [2, 2] => layout: [t1, i0, t0]);
465 assert_access_map!(S, map_reshape, 4, 12, [0, 2, 0, 2, 0, 2, 1, 3, 1, 3, 1, 3]);
466 }
467
468 #[test]
469 pub(crate) fn test_reshape_map_example5() {
470 type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
471 let map_reshape = crate::reshape_map!([3] | [2, 2] => layout: [i0, -t0, t1]);
472 assert_access_map!(S, map_reshape, 4, 12, [1, 1, 1, 0, 0, 0, 3, 3, 3, 2, 2, 2]);
473 }
474
475 #[test]
476 pub(crate) fn test_reshape_map_example5_2() {
477 type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
478 let map_reshape = crate::reshape_map!([3] | [2, 2] => layout: [-t0, -i0, t1]);
479 assert_access_map!(S, map_reshape, 4, 12, 3, [1, 0, 1, 0, 1, 0, 3, 2, 3, 2, 3, 2]);
480 }
481
482 #[test]
483 pub(crate) fn test_reshape_map_example6() {
484 type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
485 // Skip some threads by setting a smaller new size
486 let map_reshape = crate::reshape_map!([3] | [2, (2, 1)] => layout: [i0, t0, t1]);
487 // warp2thread and so use tid[1] only
488 assert_access_map!(S, map_reshape, 4, 12, [0, 0, 0, 1, 1, 1, -1, -1, -1, -1, -1, -1]);
489 }
490
491 #[test]
492 pub(crate) fn test_reshape_map_example7() {
493 type S = MockWarp2ThreadScope<4, 0>; // a group of 4 threads.
494 //Skip some data by setting a larger size
495 let map_reshape = crate::reshape_map!([(3, 4)] | [2, 2] => layout: [i0, t0, t1]);
496 assert_access_map!(S, map_reshape, 4, 12, [0, 0, 0, -1, 1, 1, 1, -1, 2, 2, 2, -1]);
497 }
498
499 #[test]
500 pub(crate) fn test_reshape_map_example8() {
501 type S = MockWarp2ThreadScope<8, 0>; // a group of 8 threads.
502 // Skip some threads by setting a smaller new size
503 let map_reshape = crate::reshape_map!([1, 3] | [(4, 2), 2] => layout: [i0, t0, t1, i1]);
504 // warp2thread and so use tid[1] only
505 assert_access_map2!(S, map_reshape, 8, 12, (1, 3), [0, 1, 4, 5, 0, 1, 4, 5, 0, 1, 4, 5]);
506 }
507
508 #[test]
509 pub(crate) fn test_reshape_map_example9() {
510 type S = MockWarp2ThreadScope<8, 0>; // a group of 8 threads.
511 // Skip some threads by setting a smaller new size
512 let map_reshape = crate::reshape_map!([3] | [(4, 2), 2] => layout: [t0, t1, i0]);
513 // warp2thread and so use tid[1] only
514 assert_access_map!(S, map_reshape, 8, 12, [0, 1, 4, 5, 0, 1, 4, 5, 0, 1, 4, 5]);
515 }
516}