device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp File Reference#
device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp File Reference
#include <iostream>#include <sstream>#include <tuple>#include "ck/host_utility/device_prop.hpp"#include "ck/host_utility/kernel_launch.hpp"#include "ck/host_utility/hip_check_error.hpp"#include "ck/host_utility/stream_utility.hpp"#include "ck/utility/common_header.hpp"#include "ck/utility/loop_scheduler.hpp"#include "ck/tensor_description/tensor_descriptor.hpp"#include "ck/tensor_description/tensor_descriptor_helper.hpp"#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"#include "ck/tensor_operation/gpu/device/device_grouped_gemm_tile_loop.hpp"#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"#include <ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp>#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d.hpp"#include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp"Go to the source code of this file.
Namespaces | |
| namespace | ck |
| namespace | ck::tensor_operation |
| namespace | ck::tensor_operation::device |
Functions | |
| template<typename GridwiseGemm, typename GemmDesc, GemmSpecialization GemmSpec, typename ADataType, typename BDataType, typename DsDataType, typename EDataType, typename ALayout, typename BLayout, typename DsLayout, typename ELayout, index_t KPerBlock, typename OffsettedBlockToCTileMap, typename LocalBlock2ETileMap, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, BlockGemmPipelineScheduler BlkGemmPipeSched, BlockGemmPipelineVersion BlkGemmPipelineVer> | |
| __global__ void | ck::tensor_operation::device::kernel_grouped_gemm_multiple_d_xdl (const void CK_CONSTANT_ADDRESS_SPACE *gemm_descs_const, const index_t group_count, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CDEElementwiseOperation cde_element_op) |
| Entry point kernel for device-wide Grouped GEMM operation. | |