gpu::cg

Struct ThreadWarpTile

pub struct ThreadWarpTile<const SIZE: usize = 32, const STRIDE: usize = 1>;

Expand description

Similar a thread block tile in a GPU kernel. But the SIZE <= warp size (e.g., 32 for NVIDIA GPUs). If SIZE = 8, stride = 4, then the clusters will be: [0, 4, 8, 12, 16, 20, 24, 28] [1, 5, 9, 13, 17, 21, 25, 29] [2, 6, 10, 14, 18, 22, 26, 30] [3, 7, 11, 15, 19, 23, 27, 31] If SIZE = 8, stride = 1, then the clusters will be: [0, 1, 2, 3, 4, 5, 6, 7] [8, 9, 10, 11, 12, 13, 14, 15] [16, 17, 18, 19, 20, 21, 22, 23] [24, 25, 26, 27, 28, 29, 30, 31]

Struct ThreadWarpTile Copy item path

Implementations§

impl<const SIZE: usize, const STRIDE: usize> ThreadWarpTile<SIZE, STRIDE>

pub const CHECKED_SIZE: u32

impl<const SIZE: usize> ThreadWarpTile<SIZE, 1>

pub const BASE_THREAD_MASK: u32

pub const LANE_MASK: u32

pub const SHIFT_COUNT: u32

pub const fn size(&self) -> u32

pub fn meta_group_size(&self) -> u32

pub fn subgroup_id(&self) -> u32

pub fn thread_mask(&self) -> u32

pub fn nvcc_redux_sync<Op: NvvmReduxSyncKind<T>, T>( &self, _op: Op, value: T, ) -> T

impl<const SIZE: usize, const STRIDE: usize> ThreadWarpTile<SIZE, STRIDE>

pub fn _subgroup_reduce<T>(_value: T, _op: &'static str) -> T

pub fn subgroup_reduce<Op, T>(self, _op: Op, value: T) -> Twhere Op: SubGroupReduceKind<T>,

Trait Implementations§

impl<const SIZE: usize> BuildChunkScope<Thread> for ThreadWarpTile<SIZE>

type CS = Warp2ThreadScope<SIZE>

fn build_chunk_scope(&self, _to: Thread) -> Warp2ThreadScope<SIZE>

impl<const SIZE: usize> BuildChunkScope<ThreadWarpTile<SIZE>> for Block

type CS = Block2WarpScope<SIZE>

fn build_chunk_scope(&self, _to: ThreadWarpTile<SIZE>) -> Block2WarpScope<SIZE>

impl<const SIZE: usize> BuildChunkScope<ThreadWarpTile<SIZE>> for Grid

type CS = Grid2WarpScope<SIZE>

fn build_chunk_scope(&self, _to: ThreadWarpTile<SIZE>) -> Grid2WarpScope<SIZE>

impl<const SIZE: usize> CGOperations for ThreadWarpTile<SIZE>

fn thread_rank(&self) -> u32

impl<const SIZE: usize, const STRIDE: usize> Clone for ThreadWarpTile<SIZE, STRIDE>

fn clone(&self) -> ThreadWarpTile<SIZE, STRIDE>

fn clone_from(&mut self, source: &Self)

impl<const SIZE: usize, T, Op> WarpReduceOp<T, Op> for ThreadWarpTile<SIZE, 1>where Op: NvvmReduxSyncKind<T> + ReduxKind,

fn redux(&self, op: Op, value: T) -> T

impl<const SIZE: usize> WarpReduceOp<f32, ReduxAdd> for ThreadWarpTile<SIZE>

fn redux(&self, _op: ReduxAdd, value: f32) -> f32

impl<const SIZE: usize> WarpReduceOp<f32, ReduxMax> for ThreadWarpTile<SIZE>

fn redux(&self, _op: ReduxMax, value: f32) -> f32

impl<const SIZE: usize, const STRIDE: usize> Copy for ThreadWarpTile<SIZE, STRIDE>

Auto Trait Implementations§

impl<const SIZE: usize, const STRIDE: usize> Freeze for ThreadWarpTile<SIZE, STRIDE>

impl<const SIZE: usize, const STRIDE: usize> RefUnwindSafe for ThreadWarpTile<SIZE, STRIDE>

impl<const SIZE: usize, const STRIDE: usize> Send for ThreadWarpTile<SIZE, STRIDE>

impl<const SIZE: usize, const STRIDE: usize> Sync for ThreadWarpTile<SIZE, STRIDE>

impl<const SIZE: usize, const STRIDE: usize> Unpin for ThreadWarpTile<SIZE, STRIDE>

impl<const SIZE: usize, const STRIDE: usize> UnwindSafe for ThreadWarpTile<SIZE, STRIDE>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> HostToDev<T> for T

fn convert(self) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct ThreadWarpTile

pub fn subgroup_reduce<Op, T>(self, _op: Op, value: T) -> T
where Op: SubGroupReduceKind<T>,

impl<const SIZE: usize, T, Op> WarpReduceOp<T, Op> for ThreadWarpTile<SIZE, 1>
where Op: NvvmReduxSyncKind<T> + ReduxKind,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,