mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp Source File#
mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp
Go to the documentation of this file.
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto async_load_tile(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:119
CK_TILE_DEVICE auto tile_elementwise_in(const InElementFunc &in_element_func, const InTensor &... in_dstr_tensors)
Definition tile_elementwise.hpp:40
CK_TILE_HOST_DEVICE constexpr auto make_tensor_view(DataType *__restrict__ p, const tensor_descriptor< Ts... > &desc)
Definition tensor_view.hpp:452
CK_TILE_HOST_DEVICE constexpr fp16x2_t pk_fp4_to_fp16x2(const pk_fp4_t &x, float scale)
Definition pk_fp4.hpp:354
CK_TILE_HOST_DEVICE constexpr bf16x2_t pk_fp4_to_bf16x2(const pk_fp4_t &x, float scale)
Definition pk_fp4.hpp:358
CK_TILE_DEVICE auto replace_bottom_tensor_view(const NewTensorView_ &new_tensor_view, const tile_scatter_gather< OldTensorView_, WindowLengths_, StaticTileDistribution_, StaticPageIndexArray_, StaticValidArray_, HsGatherDim, NumCoord > &tile_window)
Definition tile_scatter_gather.hpp:1043
CK_TILE_DEVICE index_t get_warp_id(bool_constant< ReturnSgpr >={})
Definition arch.hpp:104
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
CK_TILE_HOST_DEVICE constexpr auto merge_sequences(Seqs...)
Definition tile/core/container/sequence.hpp:826
CK_TILE_DEVICE constexpr auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition null_tile_window.hpp:75
CK_TILE_HOST_DEVICE constexpr auto to_sequence(tuple< number< Is >... >)
Definition tile/core/container/sequence.hpp:1055
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
typename uniform_sequence_gen< NSize, I >::type uniform_sequence_gen_t
Definition tile/core/container/sequence.hpp:1026
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition store_tile.hpp:23
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Intrawave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:16
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition load_tile.hpp:22
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
tuple_array< T, N > statically_indexed_array
Definition tile/core/container/statically_indexed_array.hpp:16
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:50
static constexpr index_t MPerBlockPerIter
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:118
static constexpr auto I1
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:99
static constexpr int ContinuousScaleNPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:129
remove_cvref_t< typename Problem::QuantType > BDataType
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:54
static constexpr index_t GetVectorSizeA()
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:86
ADataType ComputeType
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:58
static constexpr int ContinuousScaleKPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:130
static constexpr bool kPadN
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:91
static constexpr index_t KPerScaleLoad
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:175
static constexpr index_t BlockSize
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:76
static constexpr int ContinuousKPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:128
static constexpr index_t DsReadPreload
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:74
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const DequantBFlatWindow &scale_b_flat_window, const index_t num_loop, const index_t k_padded_zeros, void *p_smem_ping, void *p_smem_pong) const
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:1220
static constexpr index_t MIterPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:111
static constexpr index_t KIterPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:113
remove_cvref_t< typename Problem::CDataType > CDataType
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:55
static CK_TILE_HOST_DEVICE constexpr auto GetADramTileDistribution()
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:437
remove_cvref_t< typename Problem::BLayout > BLayout
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:62
remove_cvref_t< typename BlockGemmShape::WarpTile > WarpTile
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:106
static constexpr auto idxK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:103
static constexpr index_t NIterPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:112
static constexpr index_t KFlatPerBlockPerIter
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:115
static constexpr index_t AK1
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:122
static constexpr int ScaleKPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:146
static constexpr int XDLK_PerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:135
static constexpr index_t mfma_per_wg
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:157
static constexpr index_t NFlatPerBlockPerIter
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:116
static constexpr bool UsePersistentKernel
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:96
static constexpr index_t mfma_perM_perK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:179
static CK_TILE_HOST_DEVICE constexpr auto HotLoopScheduler()
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:274
static constexpr int ScaleNPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:147
remove_cvref_t< decltype(PipelinePolicy::template GetBlockFlatmm< Problem >())> BlockFlatmm
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:65
static constexpr index_t GetVectorSizeB()
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:87
static constexpr auto config
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:68
static constexpr int MXFP4K_PerScaleK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:149
static constexpr index_t dsread_num_perK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:163
static constexpr index_t Bload_rep
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:177
static constexpr index_t dswrite_num_perK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:164
static constexpr bool kPadK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:92
static constexpr index_t Bload_total_num
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:173
remove_cvref_t< typename Problem::ALayout > ALayout
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:61
CK_TILE_HOST_DEVICE auto operator()(ADramBlockWindowTmp a_copy_dram_window_, const AElementFunction &a_element_func, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const DequantBFlatWindow &scale_b_flat_window, const index_t num_loop, const index_t k_padded_zeros, void *p_smem_ping, void *p_smem_pong) const
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:446
static constexpr index_t kNPerBlock
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:80
static constexpr index_t WaveSize
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:77
static constexpr index_t dsread_per_wg
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:159
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:56
static constexpr index_t DsWritePreIssue
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:73
static constexpr index_t kMPerBlock
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:79
static constexpr index_t KPerBlockPerIter
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:119
remove_cvref_t< typename BlockGemmShape::BlockWarps > BlockWarps
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:105
static constexpr auto I0
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:98
static constexpr index_t NumWaveGroups
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:95
static constexpr int XDL_PerWeightK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:138
static constexpr index_t ScaleBload_K1
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:169
static constexpr int MXFP4KPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:145
static CK_TILE_HOST_DEVICE constexpr auto Last2ndHotLoopScheduler()
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:391
static CK_TILE_HOST_DEVICE constexpr auto LastHotLoopScheduler()
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:417
static constexpr int XDL_PerScaleK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:139
remove_cvref_t< decltype(config.template at< 0 >())> WG
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:71
static CK_TILE_HOST_DEVICE constexpr auto SchedulerPerM(index_t dsread_perM, index_t dswrite_perM, index_t load_perM)
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:187
static constexpr auto idxN
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:102
static constexpr int MXFP4PackedSize
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:121
static constexpr int ScaleKFlatPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:132
FlatmmPipelineAGmemBGmemCRegV1< Problem, PipelinePolicy > Underlying
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:51
static constexpr index_t dswrite_kIter
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:181
static constexpr bool HasHotLoop
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:151
static constexpr index_t MWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:108
static constexpr int XDL_PerScaleN
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:140
static constexpr index_t NWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:109
static constexpr auto I2
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:100
remove_cvref_t< typename Problem::CLayout > CLayout
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:63
remove_cvref_t< typename BlockGemmShape::BlockTile > BlockTile
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:104
static constexpr index_t ScaleBload_num
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:170
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BFlatBlockWindowTmp &b_flat_dram_block_window_tmp, const DequantBFlatWindow &scale_b_flat_window, const index_t num_loop, void *p_smem_ping, void *p_smem_pong) const
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:1241
static constexpr index_t GetVectorSizeC()
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:88
static constexpr index_t BK1
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:123
static constexpr index_t dswrite_rep
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:165
remove_cvref_t< typename Problem::ADataType > ADataType
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:53
static constexpr index_t Aload_rep
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:167
static constexpr index_t Bload_num_perK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:168
static constexpr index_t kLdsAlignmentInBytes
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:94
static constexpr index_t m_preload
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:124
static constexpr index_t dswrite_mIter
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:180
static constexpr index_t flatKPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:83
static constexpr index_t HalfMIter
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:176
static constexpr index_t Aload_num_perK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:166
static constexpr bool DoubleSmemBuffer
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:184
static constexpr auto idxM
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:101
static constexpr bool kPadM
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:90
static constexpr index_t kKPerBlock
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:81
static constexpr index_t flatNPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:84
static constexpr auto TailNum
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:152
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:32
BlockGemmShape_ BlockGemmShape
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:33
BDataType_ QuantType
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:35
static constexpr index_t flatKPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:44
static constexpr int ContinuousScaleKPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:43
static constexpr int ContinuousKPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:41
static constexpr index_t flatNPerWarp
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:37
static constexpr int ContinuousScaleNPerThread
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:42
static constexpr int MXF4ScaleGranularityK
Definition mixed_prec_flatmm_pipeline_agmem_bgmem_creg_v1.hpp:39
Definition flatmm_pipeline_agmem_bgmem_creg_v1.hpp:47
Definition gemm_pipeline_problem.hpp:323
Definition tile/core/utility/functional.hpp:86
Definition tile/core/container/sequence.hpp:49
Definition tile/core/utility/functional.hpp:43
Definition tile/core/utility/debug.hpp:67