-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmatmul.c
More file actions
63 lines (54 loc) · 2.14 KB
/
matmul.c
File metadata and controls
63 lines (54 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
//------------------------------------------------------------------------------------
// Matrix multiplication code adapted from an SME programming example provided
// by Arm Ltd.
//------------------------------------------------------------------------------------
#include "matmul.h"
void matmul_ref(const uint64_t rows_l, const uint64_t cols_l,
const uint64_t cols_r, const float *restrict input_left,
const float *restrict input_right, float *restrict output) {
for (uint64_t x = 0; x < rows_l; ++x) {
for (uint64_t y = 0; y < cols_r; ++y) {
float acc = 0.0f;
for (uint64_t z = 0; z < cols_l; ++z) {
acc += input_left[(x * cols_l) + z] * input_right[(z * cols_r) + y];
}
output[(x * cols_r) + y] = acc;
}
}
}
// uint64_t sme_cntw() {
// uint64_t cnt;
// asm volatile(" smstart sm \n" // smstart sm
// " cntw %[res]\n"
// " smstop sm \n" // smstop sm
// : [res] "=r"(cnt)
// :
// : "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9",
// "p10", "p11", "p12", "p13", "p14", "p15", "z0", "z1", "z2",
// "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", "z11", "z12",
// "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21",
// "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30",
// "z31");
// return cnt;
// }
// void matmul_opt(const uint64_t rows_l, const uint64_t cols_l,
// const uint64_t cols_r, const float *restrict input_left,
// const float *restrict input_right, float *restrict output) {
// asm volatile(
// " smstart sm \n" // smstart sm
// " smstop sm \n" // smstop sm
// :
// :
// :
// );
// }
// void preprocess_l(const uint64_t rows, const uint64_t cols,
// const float *restrict a, float *restrict a_mod) {
// asm volatile(
// " smstart sm \n" // smstart sm
// " smstop sm \n" // smstop sm
// :
// :
// :
// );
// }