Skip to content

Commit 6367c97

Browse files
committed
Adding README back into the gemm directory and integrate new preshuffle functions
1 parent 60bc24a commit 6367c97

15 files changed

Lines changed: 685 additions & 332 deletions

Jenkinsfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,7 +1623,7 @@ pipeline {
16231623
-D GEMM_PRESHUFFLE_LAYOUT="rcr" \
16241624
-D GEMM_PRESHUFFLE_CONFIG_FILE="default_ci_config.json" .. && \
16251625
ninja -j${nthreads()} benchmark_gemm_universal_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all && \
1626-
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
1626+
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_universal_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
16271627
python3 ../tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
16281628
python3 ../tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json """
16291629
}
@@ -1664,7 +1664,7 @@ pipeline {
16641664
-D GEMM_PRESHUFFLE_DATATYPE="fp16;fp8;bf16;bf8" \
16651665
-D GEMM_PRESHUFFLE_LAYOUT="rcr" .. && \
16661666
ninja -j${nthreads()} benchmark_gemm_universal_all benchmark_gemm_preshuffle_all benchmark_gemm_multi_d_all benchmark_gemm_streamk_all && \
1667-
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
1667+
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_universal_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
16681668
python3 ../tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json && \
16691669
python3 ../tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json """
16701670
}
@@ -1689,7 +1689,7 @@ pipeline {
16891689
-D GEMM_UNIVERSAL_DATATYPE="fp16" \
16901690
-D GEMM_UNIVERSAL_LAYOUT="rcr;rrr;crr;ccr" .. && \
16911691
ninja -j${nthreads()} benchmark_gemm_universal_all && \
1692-
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json """
1692+
python3 ../tile_engine/ops/gemm/gemm_universal/gemm_universal_benchmark.py . --problem-sizes "1024,1024,1024" --warmup 5 --repeat 5 --verbose --json results.json """
16931693
}
16941694
steps{
16951695
buildHipClangJobAndReboot(setup_args:setup_args, build_type: 'Release', execute_cmd: execute_args)

test/ck_tile/gemm_tile_engine/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# ============================================================================
1111

1212
# Locate tile_engine GEMM scripts directory
13-
set(TILE_ENGINE_GEMM_DIR "${PROJECT_SOURCE_DIR}/tile_engine/ops/gemm")
13+
set(TILE_ENGINE_GEMM_DIR "${PROJECT_SOURCE_DIR}/tile_engine/ops/gemm/gemm_universal")
1414

1515
if(NOT EXISTS ${TILE_ENGINE_GEMM_DIR})
1616
message(WARNING "Tile engine directory not found: ${TILE_ENGINE_GEMM_DIR}")
@@ -32,11 +32,11 @@ endif()
3232
# config_json - Full path to JSON configuration file
3333
# ============================================================================
3434
function(create_individual_gemm_test_target datatype layout config_name trait tile_config config_json)
35-
set(target_name "test_gemm_tile_engine_${datatype}_${layout}_${config_name}_${trait}_${tile_config}")
35+
set(target_name "test_gemm_universal_tile_engine_${datatype}_${layout}_${config_name}_${trait}_${tile_config}")
3636
set(working_path "${CMAKE_CURRENT_BINARY_DIR}/${datatype}/${layout}/${config_name}")
3737

3838
# Generated header path (already created during cmake configuration)
39-
set(test_header "${working_path}/gemm_single_${datatype}_${layout}_${trait}_${tile_config}.hpp")
39+
set(test_header "${working_path}/gemm_universal_single_${datatype}_${layout}_${trait}_${tile_config}.hpp")
4040
set(test_params_header "${working_path}/test_params.hpp")
4141

4242
# Verify header exists (should have been generated during cmake configuration)
@@ -118,7 +118,7 @@ function(build_gemm_test_targets datatype layout config_name)
118118

119119
# STEP 1: Discovery phase - list all valid kernel configurations
120120
execute_process(
121-
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_instance_builder.py
121+
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_universal_instance_builder.py
122122
--working_path ${working_path}
123123
--datatype ${datatype}
124124
--layout ${layout}
@@ -178,7 +178,7 @@ function(build_gemm_test_targets datatype layout config_name)
178178

179179
# Generate header using --gen_single
180180
execute_process(
181-
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_instance_builder.py
181+
COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_universal_instance_builder.py
182182
--working_path ${working_path}
183183
--gpu_target "${GEMM_TEST_GPU_TARGETS}"
184184
--datatype ${datatype}

tile_engine/ops/common/utils.hpp

Lines changed: 0 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -20,89 +20,6 @@ constexpr auto is_row_major(Layout)
2020
return ck_tile::bool_constant<std::is_same_v<Layout, ck_tile::tensor_layout::gemm::RowMajor>>{};
2121
}
2222

23-
// Structure to hold kernel traits for dispatcher
24-
struct KernelTraits
25-
{
26-
std::string pipeline; // compv3, compv4, mem
27-
std::string scheduler; // intrawave, interwave
28-
std::string epilogue; // cshuffle, default
29-
bool pad_m;
30-
bool pad_n;
31-
bool pad_k;
32-
bool persistent;
33-
34-
// Constructor with defaults
35-
KernelTraits()
36-
: pipeline("compv3"),
37-
scheduler("intrawave"),
38-
epilogue("cshuffle"),
39-
pad_m(false),
40-
pad_n(false),
41-
pad_k(false),
42-
persistent(false)
43-
{
44-
}
45-
};
46-
47-
48-
// Create argument parser
49-
inline auto create_args(int argc, char* argv[])
50-
{
51-
ck_tile::ArgParser arg_parser;
52-
arg_parser.insert("m", "3840", "The value for m dimension. Default is 3840.")
53-
.insert("n", "4096", "The value for n dimension. Default is 4096.")
54-
.insert("k", "2048", "The value for k dimension. Default is 2048.")
55-
.insert("stride_a", "0", "The stride value for tensor A. Default is 0.")
56-
.insert("stride_b", "0", "The stride value for tensor B. Default is 0.")
57-
.insert("stride_ds", "0", "The stride value for tensor Ds . Default is 0.")
58-
.insert("stride_c", "0", "The stride value for tensor C. Default is 0.")
59-
.insert("split_k", "1", "The split value for k dimension. Default is 1.")
60-
.insert("verify",
61-
"2",
62-
"The type of validation. Set to 0 for no validation, 1 for validation on CPU, or 2 "
63-
"for validation on GPU. Default is 2, GPU validation.")
64-
.insert("log",
65-
"false",
66-
"Whether output kernel instance information or not. Possible values are true or "
67-
"false. Default is false")
68-
.insert(
69-
"warmup", "50", "The number of iterations before benchmark the kernel. Default is 50.")
70-
.insert(
71-
"repeat", "100", "The number of iterations to benchmark the kernel. Default is 100.")
72-
.insert("timer",
73-
"true",
74-
"Whether if the timer is gpu timer or not. Possible values are false or true. "
75-
"Default is true.")
76-
.insert("init",
77-
"0",
78-
"The method of tensor initialization. Set to 0 for random, to 1 for linear, or 2 "
79-
"for constant(1). Default is 0, random.")
80-
.insert("flush_cache",
81-
"true",
82-
"To flush cache, possible values are true or false. "
83-
"Default is false.")
84-
.insert("rotating_count", "1000", "number of iterations to rotate the cache. default is 5.")
85-
.insert("metric",
86-
"0",
87-
"Metric with which to measure kernel performance. Set to 0 for latency, 1 for "
88-
"tflops, or 2 for bandwidth. Default is 0, latency.")
89-
.insert("csv_filename",
90-
"",
91-
"The filename of benchmark result. Default is empty (no CSV output).")
92-
.insert("structured_sparsity",
93-
"false",
94-
"Whether use sparsity kernel or not. Possible values are true or false. Default is "
95-
"false")
96-
.insert("json_output",
97-
"false",
98-
"Whether to output results in JSON format only. Possible values are true or false. "
99-
"Default is "
100-
"false");
101-
102-
bool result = arg_parser.parse(argc, argv);
103-
return std::make_tuple(result, arg_parser);
104-
}
105-
10623
enum class Metric
10724
{
10825
LATENCY = 0,

0 commit comments

Comments
 (0)