gemm_pipeline_ag_bg_cr_comp_v3.hpp Source File#
gemm_pipeline_ag_bg_cr_comp_v3.hpp
Go to the documentation of this file.
143 static constexpr index_t GetVectorSizeC() { return Policy::template GetVectorSizeC<Problem>(); }
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access >={}, bool_constant< oob_conditional_check >={})
Load tile with elementwise function.
Definition load_tile.hpp:41
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition type_traits.hpp:67
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
ck_tile::element_wise::PassThrough PassThrough
Definition grouped_convolution_utils.hpp:47
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_DEVICE void transpose_tile2d(OutTensor &out, const InTensor &in)
Definition transpose_tile.hpp:195
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Intrawave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:16
CK_TILE_HOST_DEVICE constexpr details::return_type< D, Ts... > make_array(Ts &&... ts)
Definition tile/core/container/array.hpp:242
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:18
static CK_TILE_HOST_DEVICE constexpr bool BlockHasHotloop(index_t num_loop)
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:24
static CK_TILE_HOST_DEVICE constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:29
static constexpr index_t PrefillStages
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:20
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:19
static constexpr index_t GlobalBufferNum
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:21
static constexpr bool UsePersistentKernel
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:22
static CK_TILE_HOST_DEVICE auto TailHandler(const RunFunction &run_func, bool has_hot_loop, TailNumber tail_number)
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:50
static CK_TILE_DEVICE constexpr auto HotLoopScheduler()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:250
PipelineImplBase Base
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:248
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:399
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:242
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:100
static constexpr index_t GetVectorSizeA()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:134
static CK_TILE_HOST std::string Print()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:190
static constexpr index_t Preshuffle
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:159
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, void *p_smem) const
Quant operator(), single input: This function runs the pipeline using compile-time known hot loop and...
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:771
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *p_smem) const
Quant operator(), single input: This function runs the pipeline by wrapping it with the tail handler.
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:743
static constexpr bool DoubleSmemBuffer
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:157
remove_cvref_t< std::tuple_element_t< 0, BsLayout > > BLayout
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:117
static constexpr bool HasHotLoop
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:161
remove_cvref_t< typename Problem::AElementWise > AElementWise
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:108
static constexpr index_t NPerBlock
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:130
static constexpr index_t BlockSize
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:127
remove_cvref_t< typename Problem::BsLayoutTuple > BsLayout
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:113
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:185
remove_cvref_t< typename Problem::AsDataTypeTuple > AsDataType
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:104
static constexpr index_t NumWaveGroups
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:158
static constexpr index_t GetVectorSizeC()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:143
static constexpr index_t KPerBlock
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:131
number< 0 > I0
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:123
remove_cvref_t< typename Problem::BsDataTypeTuple > BsDataType
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:105
static constexpr auto is_a_load_tr_v
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:167
static constexpr bool kPadN
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:154
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *p_smem) const
This function runs the pipeline by wrapping it with the tail handler.
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:661
number< 1 > I1
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:124
number< 2 > I2
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:125
remove_cvref_t< typename Problem::AsLayoutTuple > AsLayout
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:112
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:110
BaseGemmPipelineAgBgCrCompV3< Problem > Base
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:101
remove_cvref_t< std::tuple_element_t< 0, BsDataType > > BDataType
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:120
static constexpr index_t GetSmemPackB()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:151
static CK_TILE_HOST const std::string GetName()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:173
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:634
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:19
static constexpr auto Scheduler
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:165
static constexpr auto TailNum
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:163
remove_cvref_t< typename Problem::CLayout > CLayout
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:114
remove_cvref_t< typename Problem::CDataType > CDataType
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:106
static constexpr index_t APackedSize
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:145
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, void *p_smem) const
This function runs the pipeline using compile-time known hot loop and tail number.
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:695
static constexpr index_t GetVectorSizeB()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:139
remove_cvref_t< decltype(Policy::template GetBlockGemm< Problem >())> BlockGemm
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:122
static constexpr index_t GetSmemPackA()
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:150
static constexpr bool kPadM
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:153
static constexpr bool kPadK
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:155
remove_cvref_t< typename Problem::BElementWise > BElementWise
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:109
static constexpr auto is_b_load_tr_v
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:168
remove_cvref_t< std::tuple_element_t< 0, AsDataType > > ADataType
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:119
GemmPipelineAgBgCrImplBase< Problem, Policy > PipelineImplBase
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:102
static constexpr index_t MPerBlock
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:129
static constexpr index_t BPackedSize
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:147
remove_cvref_t< std::tuple_element_t< 0, AsLayout > > ALayout
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:116
Definition gemm_pipeline_ag_bg_cr_base.hpp:13
CK_TILE_DEVICE constexpr auto GetBWindows(const BDramBlockWindowTmp &b_dram_block_window_tmp, const BLdsTensorView &b_lds_block_view, const BLdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:225
remove_cvref_t< std::tuple_element_t< number< 0 >{}, BsDataType > > BDataType
Definition gemm_pipeline_ag_bg_cr_base.hpp:22
CK_TILE_DEVICE auto GetABLdsTensorViews(void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:83
static constexpr index_t NPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:26
static constexpr index_t MPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:25
CK_TILE_DEVICE void LocalPrefill(DstTileWindow &lds_tile_window, const SrcBlockTile &src_block_tile, const ElementFunction &element_func) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:57
CK_TILE_DEVICE constexpr auto GetAWindows(const ADramBlockWindowTmp &a_dram_block_window_tmp, const ALdsTensorView &a_lds_block_view, const ALdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:190
remove_cvref_t< std::tuple_element_t< number< 0 >{}, AsDataType > > ADataType
Definition gemm_pipeline_ag_bg_cr_base.hpp:20
static constexpr index_t KPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:27
Definition tile/core/numeric/integral_constant.hpp:30
Definition tile/core/numeric/numeric.hpp:81
Definition tile/core/utility/functional.hpp:43