Skip to content

Commit 916e1fa

Browse files
author
RuQing Xu
committed
Armv8A Rename Regs for Clang Compile: FP64 Part
- x7, x8: Used to store address for Alpha and Beta. As Alpha & Beta was not used in k-loops, use x0, x1 to load Alpha & Beta's addresses after k-loops are completed, since A & B's addresses are no longer needed there. This "ldr [addr]; -> ldr val, [addr]" would not cause much performance drawback since it is done outside k-loops and there are plenty of instructions between Alpha & Beta's loading and usage. - x9: Used to store cs_c. x9 is multiplied by 8 into x10 and not used any longer. Directly loading cs_c and into x10 and scale by 8 spares x9 straightforwardly. - x11, x12: Not used at all. Simply remove from clobber list. - x13: Alike x9, loaded and scaled by 8 into x14, except that x13 is also used in a conditional branch so that "cmp x13, #1" needs to be modified into "cmp x14, #8" to completely free x13. - x3, x4: Used to store next_a & next_b. Untouched in k-loops. Load these addresses into x0 and x1 after Alpha & Beta are both loaded, since then neigher address of A/B nor address of Alpha/Beta is needed.
1 parent 7fabd89 commit 916e1fa

1 file changed

Lines changed: 21 additions & 23 deletions

File tree

kernels/armv8a/3/bli_gemm_armv8a_asm_d6x8.c

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,20 +1135,14 @@ __asm__ volatile
11351135
" ldr x1,%[baddr] \n\t" // Load address of B
11361136
" ldr x2,%[caddr] \n\t" // Load address of C
11371137
" \n\t"
1138-
" ldr x3,%[a_next] \n\t" // Move pointer
1139-
" ldr x4,%[b_next] \n\t" // Move pointer
1140-
" \n\t"
11411138
" ldr x5,%[k_iter] \n\t" // Init guard (k_iter)
11421139
" ldr x6,%[k_left] \n\t" // Init guard (k_iter)
11431140
" \n\t"
1144-
" ldr x7,%[alpha] \n\t" // Alpha address
1145-
" ldr x8,%[beta] \n\t" // Beta address
1146-
" \n\t"
1147-
" ldr x9,%[cs_c] \n\t" // Load cs_c
1148-
" lsl x10,x9,#3 \n\t" // cs_c * sizeof(double)
1141+
" ldr x10,%[cs_c] \n\t" // Load cs_c
1142+
" lsl x10,x10,#3 \n\t" // cs_c * sizeof(double)
11491143
" \n\t"
1150-
" ldr x13,%[rs_c] \n\t" // Load rs_c.
1151-
" lsl x14,x13,#3 \n\t" // rs_c * sizeof(double).
1144+
" ldr x14,%[rs_c] \n\t" // Load rs_c.
1145+
" lsl x14,x14,#3 \n\t" // rs_c * sizeof(double).
11521146
" \n\t"
11531147
" add x20,x2,x10 \n\t" //Load address Column 1 of C
11541148
" add x21,x20,x10 \n\t" //Load address Column 2 of C
@@ -1610,10 +1604,16 @@ BNE(DLOOPKLEFT) // if i!=0.
16101604
" \n\t"
16111605
LABEL(DPOSTACCUM)
16121606
" \n\t"
1613-
" ld1r {v6.2d},[x7] \n\t" // Load alpha.
1614-
" ld1r {v7.2d},[x8] \n\t" // Load beta
1607+
" ldr x0,%[alpha] \n\t" // Alpha address
1608+
" ldr x1,%[beta] \n\t" // Beta address
1609+
" \n\t"
1610+
" ld1r {v6.2d},[x0] \n\t" // Load alpha.
1611+
" ld1r {v7.2d},[x1] \n\t" // Load beta
16151612
" \n\t"
1616-
" cmp x13,#1 \n\t" // If rs_c != 1 (column-major)
1613+
" ldr x0,%[a_next] \n\t" // Next A address for later use.
1614+
" ldr x1,%[b_next] \n\t" // Next B address for later use.
1615+
" \n\t"
1616+
" cmp x14,#8 \n\t" // If rs_c != 1 (column-major)
16171617
BNE(DGENSTORED)
16181618
" \n\t"
16191619
LABEL(DCOLSTORED) // C is column-major.
@@ -1771,8 +1771,8 @@ BEQ(DBETAZEROCOLSTOREDS4) // Taking care of the beta==0
17711771
" \n\t"
17721772
LABEL(DBETAZEROCOLSTOREDS4)
17731773
" \n\t"
1774-
" prfm pldl2keep,[x3] \n\t"
1775-
" prfm pldl2keep,[x4] \n\t"
1774+
" prfm pldl2keep,[x0] \n\t"
1775+
" prfm pldl2keep,[x1] \n\t"
17761776
" \n\t"
17771777
" fmla v8.2d, v26.2d,v6.d[0] \n\t" // Scale by alpha
17781778
" fmla v9.2d, v27.2d,v6.d[0] \n\t" // Scale by alpha
@@ -2016,8 +2016,8 @@ BEQ(DBETAZEROGENSTOREDS4) // Taking care of the beta==0
20162016
" \n\t"
20172017
LABEL(DBETAZEROGENSTOREDS4)
20182018
" \n\t"
2019-
" prfm pldl2keep,[x3] \n\t"
2020-
" prfm pldl2keep,[x4] \n\t"
2019+
" prfm pldl2keep,[x0] \n\t"
2020+
" prfm pldl2keep,[x1] \n\t"
20212021
" \n\t"
20222022
" fmla v8.2d, v26.2d,v6.d[0] \n\t" // Scale by alpha
20232023
" fmla v9.2d, v27.2d,v6.d[0] \n\t" // Scale by alpha
@@ -2060,12 +2060,10 @@ LABEL(DEND) // Done!
20602060
[a_next] "m" (a_next), // 8
20612061
[b_next] "m" (b_next) // 9
20622062
:// Register clobber list
2063-
"x0","x1","x2","x3",
2064-
"x4","x5","x6",
2065-
"x7","x8","x9",
2066-
"x10","x11","x12","x13","x14","x16","x17",
2067-
"x20","x21","x22","x23","x24","x25","x26",
2068-
"x27",
2063+
"x0","x1","x2",
2064+
"x5","x6","x10",
2065+
"x14","x16","x17",
2066+
"x20","x21","x22","x23","x24","x25","x26","x27",
20692067
"v0","v1","v2",
20702068
"v3","v4","v5",
20712069
"v6","v7","v8",

0 commit comments

Comments
 (0)