device_splitk_contraction_multiple_d_xdl_cshuffle.hpp Source File#
device_splitk_contraction_multiple_d_xdl_cshuffle.hpp
Go to the documentation of this file.
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
Definition convolution_backward_data_specialization.hpp:8
TensorSpecialization
Definition tensor_specialization.hpp:11
@ Packed
Definition tensor_specialization.hpp:13
GemmSpecialization
Definition gemm_specialization.hpp:11
Definition convolution_backward_data_specialization.hpp:7
CK_TILE_HOST float launch_kernel(const stream_config &s, Callables &&... callables)
Definition tile/host/kernel_launch.hpp:173
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
__global__ void kernel_contraction_multiple_d_xdl_cshuffle(const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatDsPointer p_ds_grid, FloatE *__restrict__ p_e_grid, const index_t batch_count, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CDEElementwiseOperation cde_element_op, const AGridDesc_AK0_M_AK1 a_grid_desc_ak0_m_ak1, const BGridDesc_BK0_N_BK1 b_grid_desc_bk0_n_bk1, const DsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock ds_grid_desc_mblock_mperblock_nblock_nperblock, const EGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock e_grid_desc_mblock_mperblock_nblock_nperblock, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch, const Block2ETileMap block_2_etile_map)
Definition device_batched_contraction_multiple_d_xdl_cshuffle.hpp:41
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
typename tuple_element< I, TTuple >::type tuple_element_t
Definition utility/tuple.hpp:208
__host__ __device__ constexpr auto make_merge_transform(const LowLengths &low_lengths)
Definition multi_index_transform_helper.hpp:55
__host__ __device__ constexpr auto get_container_subset(const Array< T, N > &arr, Sequence< Is... >)
Definition utility/container_helper.hpp:346
__host__ __device__ constexpr auto container_reduce(const Container &x, Reduce reduce, Init init, Number< IBegin >=Number< 0 >{}, Number< IEnd >=Number< Container::Size()>{}, Number< IStep >=Number< 1 >{})
Definition utility/container_helper.hpp:111
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
constexpr LoopScheduler make_default_loop_scheduler()
Definition loop_scheduler.hpp:20
Definition ck/stream_config.hpp:10
Definition gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp:76
__host__ static __device__ constexpr auto MakeDefaultAGridDescriptor_AKB_AK0_M_AK1(const AGridDesc_M_K &a_grid_desc_m_k, const int split_k)
Definition gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp:193
remove_cvref_t< decltype(MakeDsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(DsGridDesc_M_N{}))> DsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
Definition gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp:383
remove_cvref_t< decltype(MakeDefaultBlock2ETileMap(EGridDesc_M_N{}, 1))> DefaultBlock2ETileMap
Definition gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp:387
__host__ static __device__ constexpr auto MakeDefaultBGridDescriptor_BKB_BK0_N_BK1(const BGridDesc_N_K &b_grid_desc_n_k, const int split_k)
Definition gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp:219
__host__ static __device__ constexpr bool CheckValidity(const AGridDesc_AKB_AK0_M_AK1 &a_grid_desc_akb_ak0_m_ak1, const BGridDesc_BKB_BK0_N_BK1 &b_grid_desc_bkb_bk0_n_bk1, const DsGridDesc_M_N &ds_grid_desc_m_n, const EGridDesc_M_N &e_grid_desc_m_n, const Block2ETileMap &block_2_etile_map)
Definition gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp:293
remove_cvref_t< decltype(MakeEGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(EGridDesc_M_N{}))> EGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock
Definition gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp:380
Definition utility/sequence.hpp:43
Definition utility/sequence.hpp:256
typename conditional< kHasContent, type0, type1 >::type type
Definition utility/sequence.hpp:271
Definition utility/integral_constant.hpp:20
Definition utility/math.hpp:34
Definition functional2.hpp:33
Definition device_base.hpp:197
BaseArgument()=default
BaseInvoker()=default
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:483
__host__ __device__ constexpr long_index_t GetEPtrOffset(index_t g_idx) const
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:517
__host__ __device__ constexpr long_index_t GetAPtrOffset(index_t g_idx) const
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:495
__host__ __device__ constexpr long_index_t GetBPtrOffset(index_t g_idx) const
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:500
ComputePtrOffsetOfStridedBatch(index_t batch_stride_A, index_t batch_stride_B, DsGridDesc_G_M_N ds_grid_desc_g_m_n, EGridDesc_G_M_N e_grid_desc_g_m_n)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:484
__host__ __device__ constexpr auto GetDsPtrOffset(index_t g_idx) const
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:505
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:643
const BDataType * p_b_grid_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:788
BGridDesc_N_K b_grid_desc_n_k_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:794
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::Argument::ds_nz_stride_
std::array< index_t, NumDTensor > ds_nz_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:823
void Print() const
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:762
EDataType * p_e_grid_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:790
index_t b_nz_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:821
BGridDesc_BKB_BK0_N_BK1 b_grid_desc_bkb_bk0_n_bk1_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:803
index_t b_kz_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:822
index_t e_mz_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:824
AGridDesc_AKB_AK0_M_AK1 a_grid_desc_akb_ak0_m_ak1_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:802
Block2ETileMap block_2_etile_map_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:810
GridwiseGemm64::EGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock e_grid_desc_mblock_mperblock_nblock_nperblock_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:807
AGridDesc_M_K a_grid_desc_m_k_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:793
index_t a_batch_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:827
EGridDesc_M_N e_grid_desc_m_n_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:796
ComputePtrOffsetOfStridedBatch compute_ptr_offset_of_batch_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:830
GridwiseGemm64::DsGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock ds_grid_desc_mblock_mperblock_nblock_nperblock_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:805
void init_ds_e_grid_desc()
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:645
index_t split_k_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:832
EGridDesc_G_M_N e_grid_desc_g_m_n_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:799
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::Argument::a_element_op_
AElementwiseOperation a_element_op_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:813
GridwiseGemm64::DsGridPointer p_ds_grid_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:789
Argument(const void *p_a_grid, const void *p_b_grid, std::array< const void *, NumDTensor > p_ds_grid, void *p_e_grid, const std::vector< index_t > &a_gs_ms_ns_lengths, const std::vector< index_t > &a_gs_ms_ks_strides, const std::vector< index_t > &b_gs_ns_ks_lengths, const std::vector< index_t > &b_gs_ns_ks_strides, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_lengths, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_strides, const std::vector< index_t > &e_gs_ms_ns_lengths, const std::vector< index_t > &e_gs_ms_ns_strides, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CDEElementwiseOperation cde_element_op, index_t split_k)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:662
index_t b_batch_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:828
DsGridDesc_G_M_N ds_grid_desc_g_m_n_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:798
const ADataType * p_a_grid_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:787
index_t e_nz_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:825
index_t a_mz_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:819
DsGridDesc_M_N ds_grid_desc_m_n_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:795
CDEElementwiseOperation cde_element_op_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:815
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::Argument::b_element_op_
BElementwiseOperation b_element_op_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:814
index_t a_kz_stride_
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:820
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:837
DeviceOp::Argument Argument
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:838
float RunImp(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:841
INVOKER_RUN_IMPL float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:974
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:196
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_a, const void *p_b, std::array< const void *, NumDTensor > p_ds, void *p_e, const std::vector< index_t > &a_gs_ms_ns_lengths, const std::vector< index_t > &a_gs_ms_ks_strides, const std::vector< index_t > &b_gs_ns_ks_lengths, const std::vector< index_t > &b_gs_ns_ks_strides, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_lengths, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_strides, const std::vector< index_t > &e_gs_ms_ns_lengths, const std::vector< index_t > &e_gs_ms_ns_strides, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CDEElementwiseOperation cde_element_op, index_t split_k) override
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:1135
static auto MakeDsGridDescriptor_G_M_N(const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_lengths_vec, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_strides_vec)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:462
static constexpr auto I3
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:206
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:199
static auto MakeEGridDescriptor_G_M_N(const std::vector< index_t > &e_gs_ms_ns_lengths_vec, const std::vector< index_t > &e_gs_ms_ns_strides_vec)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:380
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::AGridDesc_AKB_AK0_M_AK1
remove_cvref_t< decltype(GridwiseGemm64::MakeDefaultAGridDescriptor_AKB_AK0_M_AK1( AGridDesc_M_K{}, 1))> AGridDesc_AKB_AK0_M_AK1
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:632
static auto MakeArgument(const void *p_a, const void *p_b, std::array< const void *, NumDTensor > p_ds, void *p_e, const std::vector< index_t > &a_gs_ms_ns_lengths, const std::vector< index_t > &a_gs_ms_ks_strides, const std::vector< index_t > &b_gs_ns_ks_lengths, const std::vector< index_t > &b_gs_ns_ks_strides, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_lengths, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_strides, const std::vector< index_t > &e_gs_ms_ns_lengths, const std::vector< index_t > &e_gs_ms_ns_strides, AElementwiseOperation a_element_op, BElementwiseOperation b_element_op, CDEElementwiseOperation cde_element_op, index_t split_k)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:1096
remove_cvref_t< decltype(MakeDsGridDescriptor_M_N({{}}, {{}}))> DsGridDesc_M_N
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:476
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::MakeAGridDescriptor_M_K
static auto MakeAGridDescriptor_M_K(const std::vector< index_t > &a_gs_ms_ks_lengths_vec, const std::vector< index_t > &a_gs_ms_ks_strides_vec)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:212
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:1171
GridwiseGemmSplitKMultipleD_xdl_cshuffle< ADataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, InMemoryDataOperationEnum::Set, AGridDesc_M_K, BGridDesc_N_K, DsGridDesc_M_N, EGridDesc_M_N, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave_, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched > GridwiseGemmBase
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:532
decltype(MakeAGridDescriptor_M_K({}, {})) AGridDesc_M_K
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:474
GridwiseGemmBase< NXdlPerWave32 > GridwiseGemm32
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:579
static bool IsSupportedArgument(const Argument &arg)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:981
static auto MakeInvoker()
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:1131
static auto MakeDsGridDescriptor_M_N(const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_lengths_vec, const std::array< std::vector< index_t >, NumDTensor > &ds_gs_ms_ns_strides_vec)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:450
static constexpr auto NXdlPerWave32
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:200
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::GridwiseGemmAtomicAdd32
GridwiseGemmAtomicAddBase< NXdlPerWave32 > GridwiseGemmAtomicAdd32
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:630
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::GridwiseGemmAtomicAdd64
GridwiseGemmAtomicAddBase< math::max(NXdlPerWave64, 1)> GridwiseGemmAtomicAdd64
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:629
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::MakeEGridDescriptor_M_N
static auto MakeEGridDescriptor_M_N(const std::vector< index_t > &e_gs_ms_ns_lengths_vec, const std::vector< index_t > &e_gs_ms_ns_strides_vec)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:324
static constexpr index_t NumDTensor
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:201
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::MakeBGridDescriptor_N_K
static auto MakeBGridDescriptor_N_K(const std::vector< index_t > &b_gs_ns_ks_lengths_vec, const std::vector< index_t > &b_gs_ns_ks_strides_vec)
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:268
decltype(MakeEGridDescriptor_G_M_N({}, {})) EGridDesc_G_M_N
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:480
static constexpr auto matrix_padder
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:208
ck::tensor_operation::device::DeviceSplitKContractionMultipleD_Xdl_CShuffle::BGridDesc_BKB_BK0_N_BK1
remove_cvref_t< decltype(GridwiseGemm64::MakeDefaultBGridDescriptor_BKB_BK0_N_BK1( BGridDesc_N_K{}, 1))> BGridDesc_BKB_BK0_N_BK1
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:635
static constexpr auto I1
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:204
decltype(MakeBGridDescriptor_N_K({}, {})) BGridDesc_N_K
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:475
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:578
typename GridwiseGemm64::DefaultBlock2ETileMap Block2ETileMap
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:639
DeviceSplitKContractionMultipleD_Xdl_CShuffle DeviceOp
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:197
static constexpr auto I2
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:205
remove_cvref_t< decltype(MakeDsGridDescriptor_G_M_N({}, {}))> DsGridDesc_G_M_N
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:479
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:1090
GridwiseGemmSplitKMultipleD_xdl_cshuffle< ADataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, InMemoryDataOperationEnum::AtomicAdd, AGridDesc_M_K, BGridDesc_N_K, DsGridDesc_M_N, EGridDesc_M_N, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave_, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, false, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, false, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEBlockTransferScalarPerVector_NPerBlock, LoopSched > GridwiseGemmAtomicAddBase
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:583
static constexpr auto I0
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:203
std::string GetTypeString() const override
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:1177
decltype(MakeEGridDescriptor_M_N({}, {})) EGridDesc_M_N
Definition device_splitk_contraction_multiple_d_xdl_cshuffle.hpp:477
Definition device_splitk_contraction_multiple_d.hpp:39
Definition matrix_padder.hpp:180