GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce > Struct Template Reference#
ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce > Struct Template Reference
#include <gridwise_elementwise_layernorm_welford_variance.hpp>
Public Types | |
| using | ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize> |
| using | ThreadBufferDimAccessOrder |
| using | ThreadClusterArrangeOrder |
| using | ThreadReduceSrcDesc_M_K |
| using | ThreadReduceDstDesc_M |
| using | ThreadwiseWelford |
| using | BlockwiseWelford |
Static Public Member Functions | |
| static __device__ int | GetKPerThread (const GridDesc_M_K &x_grid_desc_m_k, int thread_k_cluster_id) |
| static __device__ void | Run (const InGrid2dDescTuple in_grid_2d_desc_tuple, const GridDesc_M_K &x_grid_desc_m_k, const GridDesc_M_K &gamma_grid_desc_m_k, const GridDesc_M_K &beta_grid_desc_m_k, const GridDesc_M_K &y_grid_desc_m_k, index_t num_k_block_tile_iteration, AccDataType epsilon, const InDataTypePointerTuple p_in_global_tuple, XDataType *const __restrict__ p_x_lds_, const GammaDataType *const __restrict__ p_gamma_global, const BetaDataType *const __restrict__ p_beta_global, YDataType *const __restrict__ p_y_global, const XElementwiseOperation x_elementwise_op, const YElementwiseOperation y_elementwise_op) |
Static Public Attributes | |
| static constexpr index_t | NumInput = InDataTypePointerTuple::Size() |
| static constexpr bool | reorder_thread_cluster = (XSrcVectorDim == 0) |
| static constexpr auto | thread_cluster_desc |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr index_t | M_BlockTileSize = MThreadClusterSize * MThreadSliceSize |
| static constexpr index_t | K_BlockTileSize = KThreadClusterSize * KThreadSliceSize |
| static constexpr index_t | K_BlockTileStepSize = KThreadClusterSize * XSrcVectorSize |
| static constexpr auto | XThreadBufferNumber = Number<KThreadSliceSize / XSrcVectorSize>{} |
| static constexpr auto | GammaThreadBufferNumber = Number<KThreadSliceSize / GammaSrcVectorSize>{} |
| static constexpr auto | BetaThreadBufferNumber = Number<KThreadSliceSize / BetaSrcVectorSize>{} |
| static constexpr auto | YThreadBufferNumber = Number<KThreadSliceSize / YDstVectorSize>{} |
Member Typedef Documentation
◆ BlockwiseWelford
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
| using ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >::BlockwiseWelford |
Initial value:
BlockwiseWelford<AccDataType,
BlockSize,
BlockwiseWelford< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, false > BlockwiseWelford
Definition gridwise_multiblock_welford_first_half.hpp:82
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_multiblock_welford_first_half.hpp:68
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_multiblock_welford_first_half.hpp:63
◆ ThreadBufferDimAccessOrder
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
| using ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >::ThreadBufferDimAccessOrder |
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100
◆ ThreadClusterArrangeOrder
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
| using ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >::ThreadClusterArrangeOrder |
Initial value:
◆ ThreadClusterLengths_M_K
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
| using ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize> |
◆ ThreadReduceDstDesc_M
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
| using ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >::ThreadReduceDstDesc_M |
Initial value:
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
◆ ThreadReduceSrcDesc_M_K
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
| using ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >::ThreadReduceSrcDesc_M_K |
Initial value:
◆ ThreadwiseWelford
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
| using ck::GridwiseElementwiseLayernormWelfordVariance_mk_to_mk< InDataTypePointerTuple, XDataType, GammaDataType, BetaDataType, YDataType, AccDataType, XElementwiseOperation, YElementwiseOperation, InGrid2dDescTuple, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, BetaSrcVectorDim, BetaSrcVectorSize, YDstVectorDim, YDstVectorSize, SweepOnce >::ThreadwiseWelford |
Initial value:
ThreadwiseWelford< AccDataType, ThreadReduceSrcDesc_M_K, ThreadReduceDstDesc_M > ThreadwiseWelford
Definition gridwise_multiblock_welford_first_half.hpp:79
Member Function Documentation
◆ GetKPerThread()
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
inlinestatic |
◆ Run()
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
inlinestatic |
Member Data Documentation
◆ BetaThreadBufferNumber
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ GammaThreadBufferNumber
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ I0
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ I1
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ I2
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ K_BlockTileSize
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ K_BlockTileStepSize
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ M_BlockTileSize
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ NumInput
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ reorder_thread_cluster
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ thread_cluster_desc
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_elementwise_layernorm_welford_variance.hpp:55
typename conditional< reorder_thread_cluster, Sequence< 1, 0 >, Sequence< 0, 1 > >::type ThreadClusterArrangeOrder
Definition gridwise_elementwise_layernorm_welford_variance.hpp:60
◆ XThreadBufferNumber
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ YThreadBufferNumber
template<typename InDataTypePointerTuple, typename XDataType, typename GammaDataType, typename BetaDataType, typename YDataType, typename AccDataType, typename XElementwiseOperation, typename YElementwiseOperation, typename InGrid2dDescTuple, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t BetaSrcVectorDim, index_t BetaSrcVectorSize, index_t YDstVectorDim, index_t YDstVectorSize, bool SweepOnce>
|
staticconstexpr |
The documentation for this struct was generated from the following file: