gridwise_set_buffer_value.hpp Source File

gridwise_set_buffer_value.hpp Source File#

Composable Kernel: gridwise_set_buffer_value.hpp Source File
gridwise_set_buffer_value.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
8
9namespace ck {
10
11template <index_t BlockSize, typename DataType, typename Grid1dBufferDescType>
12__global__ void kernel_buffer_set_value(const Grid1dBufferDescType grid_1d_buffer_desc,
13 DataType* const __restrict__ p_global,
14 DataType value)
15
16{
17
19
20 constexpr auto I0 = Number<0>{};
21
22 const index_t thread_local_id = get_thread_local_1d_id();
23 const index_t block_global_id = get_block_1d_id();
24
25 const index_t thread_global_id = block_global_id * BlockSize + thread_local_id;
26
28
29 value_buf(I0) = value;
30
31 constexpr auto val_buff_desc = make_naive_tensor_descriptor_packed(make_tuple(Number<1>{}));
32
34 p_global, grid_1d_buffer_desc.GetElementSpaceSize());
35
36 if(thread_global_id < grid_1d_buffer_desc.GetElementSize())
37 {
38 auto threadwise_store = ThreadwiseTensorSliceTransfer_v1r3<DataType,
39 DataType,
40 decltype(val_buff_desc),
41 Grid1dBufferDescType,
42 PassThroughOp,
45 0,
46 1,
48 1,
49 true>(
50 grid_1d_buffer_desc, make_multi_index(thread_global_id), PassThroughOp{});
51
52 threadwise_store.Run(
53 val_buff_desc, make_tuple(I0), value_buf, grid_1d_buffer_desc, global_buf);
54 }
55};
56
57} // namespace ck
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
int32_t index_t
Definition ck.hpp:299
@ Set
Definition ck.hpp:278
integral_constant< index_t, N > Number
Definition number.hpp:12
__device__ index_t get_block_1d_id()
Definition get_id.hpp:47
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__global__ void kernel_buffer_set_value(const Grid1dBufferDescType grid_1d_buffer_desc, DataType *const __restrict__ p_global, DataType value)
Definition gridwise_set_buffer_value.hpp:12
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__device__ index_t get_thread_local_1d_id()
Definition get_id.hpp:41
__host__ __device__ constexpr auto make_dynamic_buffer(T *p, ElementSpaceSize element_space_size)
Definition dynamic_buffer.hpp:472
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
Definition utility/sequence.hpp:43
Definition static_buffer.hpp:16
Definition threadwise_tensor_slice_transfer.hpp:39
Definition tensor_operation/gpu/element/unary_element_wise_operation.hpp:340