library/core/src/intrinsics/gpu.rs - rust - Git at Google

 //! Intrinsics for GPU targets.
 //!
 //! Intrinsics in this module are intended for use on GPU targets.
 //! They can be target specific but in general GPU targets are similar.

 #![unstable(feature = "gpu_intrinsics", issue = "none")]

 /// Returns the pointer to workgroup memory allocated at launch-time on GPUs.
 ///
 /// Workgroup memory is a memory region that is shared between all threads in
 /// the same workgroup. It is faster to access than other memory but pointers do not
 /// work outside the workgroup where they were obtained.
 /// Workgroup memory can be allocated statically or after compilation, when
 /// launching a gpu-kernel. `gpu_launch_sized_workgroup_mem` returns the pointer to
 /// the memory that is allocated at launch-time.
 /// The size of this memory can differ between launches of a gpu-kernel, depending on
 /// what is specified at launch-time.
 /// However, the alignment is fixed by the kernel itself, at compile-time.
 ///
 /// The returned pointer is the start of the workgroup memory region that is
 /// allocated at launch-time.
 /// All calls to `gpu_launch_sized_workgroup_mem` in a workgroup, independent of the
 /// generic type, return the same address, so alias the same memory.
 /// The returned pointer is aligned by at least the alignment of `T`.
 ///
 /// If `gpu_launch_sized_workgroup_mem` is invoked multiple times with different
 /// types that have different alignment, then you may only rely on the resulting
 /// pointer having the alignment of `T` after a call to `gpu_launch_sized_workgroup_mem::<T>`
 /// has occurred in the current program execution.
 ///
 /// # Safety
 ///
 /// The pointer is safe to dereference from the start (the returned pointer) up to the
 /// size of workgroup memory that was specified when launching the current gpu-kernel.
 /// This allocated size is not related in any way to `T`.
 ///
 /// The user must take care of synchronizing access to workgroup memory between
 /// threads in a workgroup. The usual data race requirements apply.
 ///
 /// # Other APIs
 ///
 /// CUDA and HIP call this dynamic shared memory, shared between threads in a block.
 /// OpenCL and SYCL call this local memory, shared between threads in a work-group.
 /// GLSL calls this shared memory, shared between invocations in a work group.
 /// DirectX calls this groupshared memory, shared between threads in a thread-group.
 #[must_use = "returns a pointer that does nothing unless used"]
 #[rustc_intrinsic]
 #[rustc_nounwind]
 #[unstable(feature = "gpu_launch_sized_workgroup_mem", issue = "135513")]
 #[cfg(any(target_arch = "amdgpu", target_arch = "nvptx64"))]
 pub fn gpu_launch_sized_workgroup_mem<T>() -> *mut T;

 /// Returns a pointer to the HSA kernel dispatch packet.
 ///
 /// A `gpu-kernel` on amdgpu is always launched through a kernel dispatch packet.
 /// The dispatch packet contains the workgroup size, launch size and other data.
 /// The content is defined by the [HSA Platform System Architecture Specification],
 /// which is implemented e.g. in AMD's [hsa.h].
 /// The intrinsic returns a unit pointer so that rustc does not need to know the packet struct.
 /// The pointer is valid for the whole lifetime of the program.
 ///
 /// [HSA Platform System Architecture Specification]: https://hsafoundation.com/wp-content/uploads/2021/02/HSA-SysArch-1.2.pdf
 /// [hsa.h]: https://github.com/ROCm/rocm-systems/blob/rocm-7.1.0/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h#L2959
 #[rustc_nounwind]
 #[rustc_intrinsic]
 #[cfg(target_arch = "amdgpu")]
 #[must_use = "returns a pointer that does nothing unless used"]
 pub fn amdgpu_dispatch_ptr() -> *const ();
	//! Intrinsics for GPU targets.
	//!
	//! Intrinsics in this module are intended for use on GPU targets.
	//! They can be target specific but in general GPU targets are similar.

	#![unstable(feature = "gpu_intrinsics", issue = "none")]

	/// Returns the pointer to workgroup memory allocated at launch-time on GPUs.
	///
	/// Workgroup memory is a memory region that is shared between all threads in
	/// the same workgroup. It is faster to access than other memory but pointers do not
	/// work outside the workgroup where they were obtained.
	/// Workgroup memory can be allocated statically or after compilation, when
	/// launching a gpu-kernel. `gpu_launch_sized_workgroup_mem` returns the pointer to
	/// the memory that is allocated at launch-time.
	/// The size of this memory can differ between launches of a gpu-kernel, depending on
	/// what is specified at launch-time.
	/// However, the alignment is fixed by the kernel itself, at compile-time.
	///
	/// The returned pointer is the start of the workgroup memory region that is
	/// allocated at launch-time.
	/// All calls to `gpu_launch_sized_workgroup_mem` in a workgroup, independent of the
	/// generic type, return the same address, so alias the same memory.
	/// The returned pointer is aligned by at least the alignment of `T`.
	///
	/// If `gpu_launch_sized_workgroup_mem` is invoked multiple times with different
	/// types that have different alignment, then you may only rely on the resulting
	/// pointer having the alignment of `T` after a call to `gpu_launch_sized_workgroup_mem::<T>`
	/// has occurred in the current program execution.
	///
	/// # Safety
	///
	/// The pointer is safe to dereference from the start (the returned pointer) up to the
	/// size of workgroup memory that was specified when launching the current gpu-kernel.
	/// This allocated size is not related in any way to `T`.
	///
	/// The user must take care of synchronizing access to workgroup memory between
	/// threads in a workgroup. The usual data race requirements apply.
	///
	/// # Other APIs
	///
	/// CUDA and HIP call this dynamic shared memory, shared between threads in a block.
	/// OpenCL and SYCL call this local memory, shared between threads in a work-group.
	/// GLSL calls this shared memory, shared between invocations in a work group.
	/// DirectX calls this groupshared memory, shared between threads in a thread-group.
	#[must_use = "returns a pointer that does nothing unless used"]
	#[rustc_intrinsic]
	#[rustc_nounwind]
	#[unstable(feature = "gpu_launch_sized_workgroup_mem", issue = "135513")]
	#[cfg(any(target_arch = "amdgpu", target_arch = "nvptx64"))]
	pub fn gpu_launch_sized_workgroup_mem<T>() -> *mut T;

	/// Returns a pointer to the HSA kernel dispatch packet.
	///
	/// A `gpu-kernel` on amdgpu is always launched through a kernel dispatch packet.
	/// The dispatch packet contains the workgroup size, launch size and other data.
	/// The content is defined by the [HSA Platform System Architecture Specification],
	/// which is implemented e.g. in AMD's [hsa.h].
	/// The intrinsic returns a unit pointer so that rustc does not need to know the packet struct.
	/// The pointer is valid for the whole lifetime of the program.
	///
	/// [HSA Platform System Architecture Specification]: https://hsafoundation.com/wp-content/uploads/2021/02/HSA-SysArch-1.2.pdf
	/// [hsa.h]: https://github.com/ROCm/rocm-systems/blob/rocm-7.1.0/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h#L2959
	#[rustc_nounwind]
	#[rustc_intrinsic]
	#[cfg(target_arch = "amdgpu")]
	#[must_use = "returns a pointer that does nothing unless used"]
	pub fn amdgpu_dispatch_ptr() -> *const ();