device_grouped_conv_bwd_weight_explicit_xdl.hpp Source File#
device_grouped_conv_bwd_weight_explicit_xdl.hpp
Go to the documentation of this file.
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
Definition convolution_backward_data_specialization.hpp:8
constexpr bool is_NHWGC_GKYXC_NHWGK()
Definition device_grouped_conv_utils.hpp:40
ck::index_t get_best_occupancy_k_batch_value(int max_occupancy, ck::index_t grid_size)
Definition split_k_utils.hpp:30
constexpr bool is_NDHWGC_GKZYXC_NDHWGK()
Definition device_grouped_conv_utils.hpp:80
Definition convolution_backward_data_specialization.hpp:7
Definition ck.hpp:268
__host__ __device__ constexpr auto make_pass_through_transform(const LowLength &low_length)
Definition multi_index_transform_helper.hpp:12
__host__ __device__ constexpr auto make_right_pad_transform(const LowLength &low_length, const RightPadLength &right_pad, integral_constant< bool, SkipIsValidCheck >=integral_constant< bool, false >{})
Definition multi_index_transform_helper.hpp:37
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
__global__ void kernel_elementwise(const InGridDescTuple in_grid_desc_tuple, const OutGridDescTuple out_grid_desc_tuple, const InDataTypePointerTuple p_in_global_tuple, const OutDataTypePointerTuple p_out_global_tuple, const Block2TileMap block_2_tile_map, const ElementwiseOperation elementwise_op)
Definition gridwise_elementwise_2d.hpp:29
Definition ck/stream_config.hpp:10
Definition block_to_ctile_map.hpp:261
Definition gridwise_elementwise_2d.hpp:278
Definition utility/sequence.hpp:43
Definition utility/tuple.hpp:117
Definition device_base.hpp:197
void * p_workspace_
Definition device_base.hpp:204
BaseArgument()=default
BaseInvoker()=default
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:99
std::array< ck::index_t, NDimSpatial > filter_spatial_lengths_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:268
CElementwiseGridDesc elementwise_desc_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:274
Block2TileMapElementwise elementwise_block_2_ctile_map_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:275
const std::array< ck::index_t, NDimSpatial > & conv_filter_strides_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:269
std::size_t GetWorkspaceETensorSizeBytes() const
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:243
Argument(const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< index_t, NDimSpatial+3 > &, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, ck::index_t split_k)
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:102
ck::index_t split_k_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:276
WeiDataType * p_wei_grid_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:272
const std::array< ck::index_t, NDimSpatial > & input_left_pads_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:270
std::size_t GetWorkspaceSizeBytes() const
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:255
const std::array< ck::index_t, NDimSpatial > & input_right_pads_
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:271
GemmArgument explicit_gemm_args
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:267
bool is_filter_data_packed
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:273
typename DeviceGemmV3Op::Argument GemmArgument
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:100
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:281
DeviceGemmV3Op::Invoker explicit_gemm_op
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:331
typename DeviceGemmV3Op::Argument GemmArgument
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:283
float Run(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:285
float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:325
DeviceOp::Argument Argument
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:282
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:46
static constexpr index_t ElemsPerBlock
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:63
static auto MakeInvoker()
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:447
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:404
GridwiseElementwise< Tuple< CElementwiseGridDesc >, Tuple< CElementwiseGridDesc >, Tuple< const float * >, Tuple< WeiDataType * >, Block2TileMapElementwise, WeiElementwiseOperation, ElementwiseBlockSize, I1, ElemsPerBlock, I1, ElemsPerBlock/ElementwiseBlockSize, Sequence< 0, 1 >, Sequence< 1 >, Sequence< 1 >, I1, I1 > GridwiseElementwiseCast
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:81
static auto MakeArgument(const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const ck::index_t split_k)
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:410
static constexpr bool IsValidCompilationParameter()
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:334
static constexpr auto I0
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:51
static constexpr index_t ElementwiseBlockSize
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:62
static bool IsSupportedArgument(const Argument &arg)
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:340
void SetWorkSpacePointer(BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:516
static constexpr auto I1
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:52
static constexpr auto I2
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:53
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:487
typename DeviceGemmV3Op::CDataType_ TwoStageIntermediateType
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:60
static constexpr bool IsTwoStageNeeded
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:55
BlockToCTileMap_M00_N0_M01Adapt< 1, ElemsPerBlock > Block2TileMapElementwise
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:80
remove_cvref_t< decltype(GetElementwiseCGridDesc(I1))> CElementwiseGridDesc
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:79
std::string GetTypeString() const override
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:492
static auto GetElementwiseCGridDesc(index_t merged_filter_dims)
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:65
DeviceGroupedConvBwdWeight_Explicit_Xdl DeviceOp
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:59
size_t GetWorkSpaceSize(const BaseArgument *p_arg) const override
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:503
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_in_grid, void *p_wei_grid, const void *p_out_grid, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const ck::index_t split_k) override
Definition device_grouped_conv_bwd_weight_explicit_xdl.hpp:450
Definition device_grouped_conv_bwd_weight.hpp:29
Definition tensor_operation/gpu/element/unary_element_wise_operation.hpp:340