detail Namespace Reference#
Typedefs | |
| template<int32_t Size> | |
| using | make_applier = __make_integer_seq<applier, index_t, Size> |
Functions | |
| template<typename F, typename X, index_t... Is> | |
| CK_TILE_HOST_DEVICE constexpr auto | transform_tuples_impl (F f, const X &x, sequence< Is... >) |
| template<typename F, typename X, typename Y, index_t... Is> | |
| CK_TILE_HOST_DEVICE constexpr auto | transform_tuples_impl (F f, const X &x, const Y &y, sequence< Is... >) |
| template<typename F, typename X, typename Y, typename Z, index_t... Is> | |
| CK_TILE_HOST_DEVICE constexpr auto | transform_tuples_impl (F f, const X &x, const Y &y, const Z &z, sequence< Is... >) |
| template<typename F, typename Tuple, index_t... Is> | |
| constexpr decltype(auto) | apply_impl (F &&f, Tuple &&t, sequence< Is... >) |
| template<typename F, typename X, index_t... Is> | |
| CK_TILE_HOST_DEVICE constexpr auto | embed_tuples_impl (F f, const X &x, sequence< Is... >) |
| CK_TILE_HOST_DEVICE constexpr _Float16 | lane0 (const fp16x2_t &v) |
| CK_TILE_HOST_DEVICE constexpr _Float16 | lane1 (const fp16x2_t &v) |
| CK_TILE_HOST_DEVICE constexpr bfloat16_t | lane0 (const bf16x2_t &v) |
| CK_TILE_HOST_DEVICE constexpr bfloat16_t | lane1 (const bf16x2_t &v) |
| CK_TILE_HOST_DEVICE constexpr float | lane0 (const fp32x2_t &v) |
| CK_TILE_HOST_DEVICE constexpr float | lane1 (const fp32x2_t &v) |
| template<typename OutTensor, typename InTensor> | |
| CK_TILE_DEVICE void | shuffle_tile_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor) |
| template<typename Lengths, typename Strides, index_t I, typename AccOld> | |
| CK_TILE_HOST_DEVICE constexpr auto | calculate_element_space_size_impl (const Lengths &lengths, const Strides &strides, number< I > i, AccOld acc_old) |
| template<typename Distribution> | |
| CK_TILE_HOST_DEVICE auto | get_partition_index (Distribution) |
| template<index_t... Is> | |
| CK_TILE_HOST_DEVICE constexpr auto | make_tile_distributed_span (sequence< Is... >) |
| template<index_t... Is> | |
| CK_TILE_HOST_DEVICE constexpr auto | make_tile_distributed_index (sequence< Is... >) |
| template<index_t NDimMax> | |
| CK_TILE_HOST_DEVICE constexpr auto | make_sequential_index (index_t ibegin, index_t iend) |
| template<typename StaticTileDistributionEncoding_> | |
| CK_TILE_HOST_DEVICE constexpr auto | make_adaptor_encoding_for_tile_distribution (StaticTileDistributionEncoding_) |
| template<typename Distribution, index_t... XSliceBegins, index_t... XSliceEnds> | |
| CK_TILE_HOST_DEVICE constexpr auto | slice_distribution_from_x (Distribution, sequence< XSliceBegins... > x_slice_begins, sequence< XSliceEnds... > x_slice_ends) |
| template<typename OuterDstr, typename InnerDstr> | |
| CK_TILE_HOST_DEVICE constexpr auto | make_embed_tile_distribution_encoding (OuterDstr, InnerDstr) |
| template<typename InDstr, index_t... InReduceDimXs> | |
| CK_TILE_HOST_DEVICE constexpr auto | make_reduce_tile_distribution_encoding_impl (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in) |
| template<typename InDstr, index_t... InReduceDimXs> | |
| CK_TILE_HOST_DEVICE constexpr auto | make_reduce_tile_distribution_encoding (InDstr, sequence< InReduceDimXs... > reduce_dim_xs_in) |
| template<typename OutTensor, typename InTensor> | |
| CK_TILE_DEVICE void | transpose_tile2d_impl_in_thread (OutTensor &out_tensor, const InTensor &in_tensor) |
| CK_TILE_DEVICE float | fma_impl_vsv (float a, float b, float c) |
| CK_TILE_DEVICE float | add_impl_vv (float lhs, float rhs) |
| CK_TILE_DEVICE float | mul_impl_vv (float lhs, float rhs) |
| CK_TILE_DEVICE fp16x2_t | cvt_pk_fp16_f32 (float a, float b) |
| CK_TILE_DEVICE bf16x2_t | cvt_pk_bf16_f32 (float a, float b) |
| CK_TILE_DEVICE fp32x2_t | pk_mul_f32 (fp32x2_t lhs, fp32x2_t rhs) |
Variables | |
| template<typename X, typename Y> | |
| constexpr bool | is_similiar_distributed_tensor_v |
| constexpr index_t | philox_per_tile = 64 |
Typedef Documentation
◆ make_applier
template<int32_t Size>
| using ck_tile::detail::make_applier = __make_integer_seq<applier, index_t, Size> |
Function Documentation
◆ add_impl_vv()
| CK_TILE_DEVICE float ck_tile::detail::add_impl_vv | ( | float | lhs, |
| float | rhs ) |
◆ apply_impl()
template<typename F, typename Tuple, index_t... Is>
|
constexpr |
◆ calculate_element_space_size_impl()
template<typename Lengths, typename Strides, index_t I, typename AccOld>
|
constexpr |
◆ cvt_pk_bf16_f32()
| CK_TILE_DEVICE bf16x2_t ck_tile::detail::cvt_pk_bf16_f32 | ( | float | a, |
| float | b ) |
◆ cvt_pk_fp16_f32()
| CK_TILE_DEVICE fp16x2_t ck_tile::detail::cvt_pk_fp16_f32 | ( | float | a, |
| float | b ) |
◆ embed_tuples_impl()
template<typename F, typename X, index_t... Is>
|
constexpr |
◆ fma_impl_vsv()
| CK_TILE_DEVICE float ck_tile::detail::fma_impl_vsv | ( | float | a, |
| float | b, | ||
| float | c ) |
◆ get_partition_index()
template<typename Distribution>
| CK_TILE_HOST_DEVICE auto ck_tile::detail::get_partition_index | ( | Distribution | ) |
◆ lane0() [1/3]
|
constexpr |
◆ lane0() [2/3]
|
constexpr |
◆ lane0() [3/3]
|
constexpr |
◆ lane1() [1/3]
|
constexpr |
◆ lane1() [2/3]
|
constexpr |
◆ lane1() [3/3]
|
constexpr |
◆ make_adaptor_encoding_for_tile_distribution()
template<typename StaticTileDistributionEncoding_>
|
constexpr |
◆ make_embed_tile_distribution_encoding()
template<typename OuterDstr, typename InnerDstr>
|
constexpr |
◆ make_reduce_tile_distribution_encoding()
template<typename InDstr, index_t... InReduceDimXs>
|
constexpr |
◆ make_reduce_tile_distribution_encoding_impl()
template<typename InDstr, index_t... InReduceDimXs>
|
constexpr |
◆ make_sequential_index()
template<index_t NDimMax>
|
constexpr |
◆ make_tile_distributed_index()
template<index_t... Is>
|
constexpr |
◆ make_tile_distributed_span()
template<index_t... Is>
|
constexpr |
◆ mul_impl_vv()
| CK_TILE_DEVICE float ck_tile::detail::mul_impl_vv | ( | float | lhs, |
| float | rhs ) |
◆ pk_mul_f32()
| CK_TILE_DEVICE fp32x2_t ck_tile::detail::pk_mul_f32 | ( | fp32x2_t | lhs, |
| fp32x2_t | rhs ) |
◆ shuffle_tile_impl_in_thread()
template<typename OutTensor, typename InTensor>
| CK_TILE_DEVICE void ck_tile::detail::shuffle_tile_impl_in_thread | ( | OutTensor & | out_tensor, |
| const InTensor & | in_tensor ) |
◆ slice_distribution_from_x()
template<typename Distribution, index_t... XSliceBegins, index_t... XSliceEnds>
|
constexpr |
◆ transform_tuples_impl() [1/3]
template<typename F, typename X, typename Y, typename Z, index_t... Is>
|
constexpr |
◆ transform_tuples_impl() [2/3]
template<typename F, typename X, typename Y, index_t... Is>
|
constexpr |
◆ transform_tuples_impl() [3/3]
template<typename F, typename X, index_t... Is>
|
constexpr |
◆ transpose_tile2d_impl_in_thread()
template<typename OutTensor, typename InTensor>
| CK_TILE_DEVICE void ck_tile::detail::transpose_tile2d_impl_in_thread | ( | OutTensor & | out_tensor, |
| const InTensor & | in_tensor ) |
Variable Documentation
◆ is_similiar_distributed_tensor_v
template<typename X, typename Y>
|
inlineconstexpr |
Initial value:
=
static constexpr bool value
Definition static_distributed_tensor.hpp:216
◆ philox_per_tile
|
constexpr |