gpu/
ldst.rs

1macro_rules! impl_ld {
2    ($fname: ident, $ty:ty, $c: literal, $t: literal) => {
3        #[gpu_macros::device]
4        #[inline(always)]
5        pub fn $fname(ptr: &$ty) -> $ty {
6            let mut ret: $ty;
7            let ptr = ptr as *const $ty;
8            unsafe {
9                $crate::asm!(
10                    concat!("ld.global.", $c, ".", $t, " {0:reg32}, [{1:reg64}];"),
11                    out(reg) ret,
12                    in(reg) ptr,
13                );
14            }
15            ret
16        }
17    };
18}
19
20impl_ld!(__ldcs_u32, u32, "cs", "u32");
21impl_ld!(__ldcs_i32, i32, "cs", "u32");
22impl_ld!(__ldcs_f32, f32, "cs", "f32");
23
24impl_ld!(__ldcg_u32, u32, "cg", "u32");
25impl_ld!(__ldcg_i32, i32, "cg", "u32");
26impl_ld!(__ldcg_f32, f32, "cg", "f32");
27
28macro_rules! impl_st {
29    ($fname: ident, $ty:ty, $c: literal, $t: literal) => {
30        #[gpu_macros::device]
31        #[inline(always)]
32        pub fn $fname(ptr: &mut $ty, val: $ty) {
33            let ptr = ptr as *mut $ty;
34            unsafe {
35                $crate::asm!(
36                    concat!("st.global.", $c, ".", $t, "[{0:reg64}], {1:reg32};"),
37                    in(reg) ptr,
38                    in(reg) val,
39                );
40            }
41        }
42    };
43}
44
45impl_st!(__stcs_u32, u32, "cs", "u32");
46impl_st!(__stcs_i32, i32, "cs", "u32");
47impl_st!(__stcs_f32, f32, "cs", "f32");
48
49impl_st!(__stcg_u32, u32, "cg", "u32");
50impl_st!(__stcg_i32, i32, "cg", "u32");
51impl_st!(__stcg_f32, f32, "cg", "f32");
52
53pub trait CacheStreamLoadStore: Sized {
54    type Output;
55    fn ldcs(&self) -> Self::Output;
56    fn stcs(&mut self, val: Self::Output);
57    fn ldcg(&self) -> Self::Output;
58    fn stcg(&mut self, val: Self::Output);
59}
60
61impl CacheStreamLoadStore for i32 {
62    type Output = i32;
63    #[gpu_macros::device]
64    #[inline(always)]
65    fn ldcs(&self) -> Self::Output {
66        __ldcs_i32(self)
67    }
68
69    #[gpu_macros::device]
70    #[inline(always)]
71    fn stcs(&mut self, val: Self::Output) {
72        __stcs_i32(self, val)
73    }
74
75    #[gpu_macros::device]
76    #[inline(always)]
77    fn ldcg(&self) -> Self::Output {
78        __ldcg_i32(self)
79    }
80
81    #[gpu_macros::device]
82    #[inline(always)]
83    fn stcg(&mut self, val: Self::Output) {
84        __stcg_i32(self, val)
85    }
86}
87
88impl CacheStreamLoadStore for f32 {
89    type Output = f32;
90
91    #[gpu_macros::device]
92    #[inline(always)]
93    fn ldcs(&self) -> Self::Output {
94        __ldcs_f32(self)
95    }
96
97    #[gpu_macros::device]
98    #[inline(always)]
99    fn stcs(&mut self, val: Self::Output) {
100        __stcs_f32(self, val)
101    }
102
103    #[gpu_macros::device]
104    #[inline(always)]
105    fn ldcg(&self) -> Self::Output {
106        __ldcg_f32(self)
107    }
108
109    #[gpu_macros::device]
110    #[inline(always)]
111    fn stcg(&mut self, val: Self::Output) {
112        __stcg_f32(self, val)
113    }
114}