@@ -20,89 +20,6 @@ constexpr auto is_row_major(Layout)
2020 return ck_tile::bool_constant<std::is_same_v<Layout, ck_tile::tensor_layout::gemm::RowMajor>>{};
2121}
2222
23- // Structure to hold kernel traits for dispatcher
24- struct KernelTraits
25- {
26- std::string pipeline; // compv3, compv4, mem
27- std::string scheduler; // intrawave, interwave
28- std::string epilogue; // cshuffle, default
29- bool pad_m;
30- bool pad_n;
31- bool pad_k;
32- bool persistent;
33-
34- // Constructor with defaults
35- KernelTraits ()
36- : pipeline(" compv3" ),
37- scheduler (" intrawave" ),
38- epilogue(" cshuffle" ),
39- pad_m(false ),
40- pad_n(false ),
41- pad_k(false ),
42- persistent(false )
43- {
44- }
45- };
46-
47-
48- // Create argument parser
49- inline auto create_args (int argc, char * argv[])
50- {
51- ck_tile::ArgParser arg_parser;
52- arg_parser.insert (" m" , " 3840" , " The value for m dimension. Default is 3840." )
53- .insert (" n" , " 4096" , " The value for n dimension. Default is 4096." )
54- .insert (" k" , " 2048" , " The value for k dimension. Default is 2048." )
55- .insert (" stride_a" , " 0" , " The stride value for tensor A. Default is 0." )
56- .insert (" stride_b" , " 0" , " The stride value for tensor B. Default is 0." )
57- .insert (" stride_ds" , " 0" , " The stride value for tensor Ds . Default is 0." )
58- .insert (" stride_c" , " 0" , " The stride value for tensor C. Default is 0." )
59- .insert (" split_k" , " 1" , " The split value for k dimension. Default is 1." )
60- .insert (" verify" ,
61- " 2" ,
62- " The type of validation. Set to 0 for no validation, 1 for validation on CPU, or 2 "
63- " for validation on GPU. Default is 2, GPU validation." )
64- .insert (" log" ,
65- " false" ,
66- " Whether output kernel instance information or not. Possible values are true or "
67- " false. Default is false" )
68- .insert (
69- " warmup" , " 50" , " The number of iterations before benchmark the kernel. Default is 50." )
70- .insert (
71- " repeat" , " 100" , " The number of iterations to benchmark the kernel. Default is 100." )
72- .insert (" timer" ,
73- " true" ,
74- " Whether if the timer is gpu timer or not. Possible values are false or true. "
75- " Default is true." )
76- .insert (" init" ,
77- " 0" ,
78- " The method of tensor initialization. Set to 0 for random, to 1 for linear, or 2 "
79- " for constant(1). Default is 0, random." )
80- .insert (" flush_cache" ,
81- " true" ,
82- " To flush cache, possible values are true or false. "
83- " Default is false." )
84- .insert (" rotating_count" , " 1000" , " number of iterations to rotate the cache. default is 5." )
85- .insert (" metric" ,
86- " 0" ,
87- " Metric with which to measure kernel performance. Set to 0 for latency, 1 for "
88- " tflops, or 2 for bandwidth. Default is 0, latency." )
89- .insert (" csv_filename" ,
90- " " ,
91- " The filename of benchmark result. Default is empty (no CSV output)." )
92- .insert (" structured_sparsity" ,
93- " false" ,
94- " Whether use sparsity kernel or not. Possible values are true or false. Default is "
95- " false" )
96- .insert (" json_output" ,
97- " false" ,
98- " Whether to output results in JSON format only. Possible values are true or false. "
99- " Default is "
100- " false" );
101-
102- bool result = arg_parser.parse (argc, argv);
103- return std::make_tuple (result, arg_parser);
104- }
105-
10623enum class Metric
10724{
10825 LATENCY = 0 ,
0 commit comments