flatmm_pipeline_agmem_bgmem_creg_v1.hpp Source File#
flatmm_pipeline_agmem_bgmem_creg_v1.hpp
Go to the documentation of this file.
503 template <typename ADramBlockWindowTmp, typename BFlatBlockWindowTmp, typename AElementFunction>
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto tile_elementwise_in(const InElementFunc &in_element_func, const InTensor &... in_dstr_tensors)
Definition tile_elementwise.hpp:40
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType *__restrict__ p, const tensor_descriptor< Ts... > &desc)
Definition tensor_view.hpp:452
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
CK_TILE_DEVICE index_t get_warp_id(bool_constant< ReturnSgpr >={})
Definition arch.hpp:104
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
tuple_array< T, N > statically_indexed_array
Definition tile/core/container/statically_indexed_array.hpp:16
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:14
static CK_TILE_HOST constexpr bool BlockHasHotloop(index_t num_loop)
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:17
static constexpr index_t PrefetchStages
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:15
static CK_TILE_HOST_DEVICE auto TailHandler(const RunFunction &run_func, bool, TailNumber tail_num)
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:28
static CK_TILE_HOST constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:22
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:47
remove_cvref_t< decltype(config.template at< 0 >())> WG
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:63
static constexpr index_t dsread_num_perK
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:162
static constexpr index_t Bload_num_perK
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:167
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:427
remove_cvref_t< typename Problem::CLayout > CLayout
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:55
static constexpr auto idxK
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:95
static constexpr index_t GetVectorSizeA()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:78
remove_cvref_t< typename Problem::CDataType > CDataType
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:50
remove_cvref_t< typename Problem::ADataType > ADataType
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:48
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:481
static constexpr bool DoubleSmemBuffer
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:187
static constexpr index_t flatNPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:76
static constexpr index_t KPerBlockPerIter
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:111
static constexpr index_t kNPerBlock
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:72
remove_cvref_t< typename BlockGemmShape::BlockTile > BlockTile
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:96
static constexpr index_t mfma_per_wg
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:156
static constexpr index_t MIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:103
static constexpr index_t kMPerBlock
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:71
static constexpr index_t NWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:101
static constexpr index_t KFlatPerBlockPerIter
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:107
static constexpr bool UsePersistentKernel
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:88
static constexpr index_t kKPerBlock
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:73
remove_cvref_t< typename BlockGemmShape::WarpTile > WarpTile
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:98
static constexpr auto I0
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:90
static constexpr index_t GetVectorSizeB()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:79
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:988
static constexpr index_t DsWritePreIssue
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:65
static constexpr bool kPadM
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:82
static constexpr index_t Bload_rep
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:169
static constexpr auto I1
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:91
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:191
remove_cvref_t< typename Problem::ALayout > ALayout
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:53
static constexpr index_t flatKPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:75
static constexpr index_t mfma_perM_perK
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:171
static constexpr bool kPadK
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:84
static constexpr index_t MWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:100
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:51
static constexpr index_t kLdsAlignmentInBytes
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:86
static constexpr index_t NFlatPerBlockPerIter
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:108
static constexpr index_t KIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:105
remove_cvref_t< typename BlockGemmShape::BlockWarps > BlockWarps
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:97
static constexpr index_t NIterPerWarp
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:104
remove_cvref_t< typename Problem::BLayout > BLayout
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:54
static CK_TILE_HOST_DEVICE constexpr auto TransposeC()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:189
static constexpr auto idxM
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:93
static constexpr index_t dswrite_rep
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:164
static constexpr index_t dswrite_mIter
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:172
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM(index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:197
static constexpr index_t NumWaveGroups
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:87
static constexpr auto idxN
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:94
static CK_TILE_HOST const std::string GetName()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:175
static constexpr auto I2
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:92
static constexpr index_t dswrite_num_perK
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:163
static constexpr auto config
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:60
static constexpr index_t Aload_num_perK
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:165
static constexpr index_t K1
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:113
remove_cvref_t< typename Problem::BDataType > BDataType
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:49
static constexpr auto TailNum
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:119
static constexpr bool HasHotLoop
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:118
static constexpr index_t HalfMIter
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:168
static constexpr index_t DsReadPreload
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:66
static constexpr index_t WaveSize
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:69
static constexpr index_t dsread_per_wg
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:158
static constexpr index_t dswrite_kIter
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:173
static constexpr index_t GetVectorSizeC()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:80
static constexpr bool kPadN
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:83
remove_cvref_t< decltype(PipelinePolicy::template GetBlockFlatmm< Problem >())> BlockFlatmm
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:57
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler()
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:286
static constexpr index_t BlockSize
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:68
CK_TILE_HOST_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:504
static constexpr index_t MPerBlockPerIter
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:110
static constexpr index_t m_preload
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:114
static constexpr index_t Aload_rep
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:166
Definition tile/core/numeric/integral_constant.hpp:30
Definition tile/core/container/sequence.hpp:49
Definition tile/core/utility/functional.hpp:43