Skip to content

Commit 5c09651

Browse files
committed
estimate vgpr
1 parent d7497d2 commit 5c09651

6 files changed

Lines changed: 368 additions & 273 deletions

File tree

example/35_splitK_gemm/gemm_wmma_splitk_reduce_multi_d_fp16.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ using DeviceGemmV2Instance =
3333
ADataType, BDataType, DsDataType, CDataType, AccDataType, CShuffleDataType,
3434
AElementOp, BElementOp, CDEElementOp, GemmDefault,
3535
256,
36-
128, 256, 64,
36+
128, 128, 64,
3737
8, 8,
3838
16, 16,
39-
4, 4,
39+
4, 2,
4040
S<4, 64, 1>, S<0, 2, 1>, S<0, 2, 1>,
4141
1, 1, 8, true,
4242
S<4, 64, 1>, S<0, 2, 1>, S<0, 2, 1>,

example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_bias_fastgelu_bf16_i8.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,13 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleABD_Wmm
8181
GemmSpec,
8282
256,
8383
128,
84-
128,
84+
64,
8585
64,
8686
8,
8787
8,
8888
16,
8989
16,
90-
4,
90+
2,
9191
2,
9292
S<8, 32, 1>,
9393
S<1, 0, 2>,
@@ -104,7 +104,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleABD_Wmm
104104
8,
105105
0,
106106
1,
107-
1,
107+
2,
108108
S<1, 32, 1, 8>,
109109
S<8, 8, 8>,
110110
ck::BlockGemmPipelineScheduler::Intrawave,

example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_fastgelu_bf16_i8.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleABD_Wmm
106106
S<1, 32, 1, 8>,
107107
S<8, 8, 8>,
108108
ck::BlockGemmPipelineScheduler::Intrawave,
109-
ck::BlockGemmPipelineVersion::v3>;
109+
ck::BlockGemmPipelineVersion::v1>;
110110

111111
int main(int argc, char* argv[])
112112
{

example/60_gemm_multi_ABD/gemm_multi_ABD_wmma_multiply_bias_fastgelu_bf16_i8.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultipleABD_Wmm
107107
S<1, 32, 1, 8>,
108108
S<8, 8, 8>,
109109
ck::BlockGemmPipelineScheduler::Intrawave,
110-
ck::BlockGemmPipelineVersion::v3>;
110+
ck::BlockGemmPipelineVersion::v1>;
111111

112112
int main(int argc, char* argv[])
113113
{

example/65_gemm_multiply_multiply/gemm_multiply_multiply_wmma_fp16_bpreshuffle.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ using DeviceOpInstance =
6565
A0DataType, B0DataType, DsDataType, EDataType, AccDataType, CShuffleDataType,
6666
AElementOp, BElementOp, CDEElementOp, GemmSpec,
6767
128,
68-
32, 128, 128,
68+
32, 128, 64,
6969
8, 8,
7070
16, 16,
7171
2, 2,
72-
S<16, 8, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0,
73-
S<16, 8, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0,
72+
S<8, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0,
73+
S<8, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 0,
7474
1, 1, S<1, 16, 1, 8>, S<4, 4, 1>,
7575
ck::BlockGemmPipelineScheduler::Intrawave,
7676
ck::BlockGemmPipelineVersion::v1,

0 commit comments

Comments
 (0)