Skip to content

Commit ee238e4

Browse files
committed
[mppa] Add Mppa PMU counters
Add support for Mppa specific counters in the existing interface for PMU counters. These counters are extracted from the list of all counters, and processed separately. All counters are available in each KVX PE, so some post-processing is required to reduce the raw measured values. Several reduction functions are available, on both the PEs and clusters: - avg: average - min: minimum - max: maximum - sum: sum - <id>: select one specific cluster/PE id Host counters are still working. Only one csrcs perf_event function has been overridden, to enable Mppa counters by reseting previous values
1 parent 43d5d1f commit ee238e4

6 files changed

Lines changed: 432 additions & 30 deletions

File tree

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ check-lit-nvgpu:
5858

5959
check-lit-mppa:
6060
env XTC_MLIR_TARGET=mppa lit -v -j 1 tests/filecheck/backends/target_mppa
61+
env XTC_MLIR_TARGET=mppa lit -v -j 1 tests/filecheck/evaluation/test_matmul_pmu_counters_mppa.py
6162

6263
check-pytest:
6364
scripts/pytest/run_pytest.sh -v
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* SPDX-License-Identifier: BSD-3-Clause
3+
* Copyright (c) 2024-2026 The XTC Project Authors
4+
*/
5+
6+
#ifndef __PERF_EVENT_MPPA_H__
7+
#define __PERF_EVENT_MPPA_H__
8+
9+
#include "perf_event.h"
10+
11+
#define mppa_OPEN_PERF_EVENT open_perf_event /* reuse host */
12+
#define mppa_READ_PERF_EVENT read_perf_event /* reuse host */
13+
#define mppa_CLOSE_PERF_EVENT close_perf_event /* reuse host */
14+
#define mppa_OPEN_PERF_EVENTS open_perf_events /* reuse host */
15+
#define mppa_ENABLE_PERF_EVENTS mppa_enable_perf_events
16+
#define mppa_CLOSE_PERF_EVENTS close_perf_events /* reuse host */
17+
#define mppa_RESET_PERF_EVENTS reset_perf_events /* reuse host */
18+
#define mppa_START_PERF_EVENTS start_perf_events /* reuse host */
19+
#define mppa_STOP_PERF_EVENTS stop_perf_events /* reuse host */
20+
#define mppa_GET_PERF_EVENT_CONFIG get_perf_event_config /* reuse host */
21+
#define mppa_PERF_EVENT_ARGS_DESTROY perf_event_args_destroy /* reuse host */
22+
23+
extern void mppa_reset_perf_events();
24+
25+
void mppa_enable_perf_events(int n_events, const int *fds) {
26+
mppa_reset_perf_events();
27+
enable_perf_events(n_events, fds);
28+
}
29+
30+
#endif
31+
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* SPDX-License-Identifier: BSD-3-Clause
3+
* Copyright (c) 2024-2026 The XTC Project Authors
4+
*/
5+
#include "host_structures.h"
6+
#include "mppa_management_host.h"
7+
8+
#include <mppa_offload_host.h>
9+
10+
#include <stdlib.h>
11+
#include <assert.h>
12+
#include <stddef.h>
13+
#include <stdint.h>
14+
15+
#define MAX_NB_PM_COUNTERS 8 /* value of _COS_PM_NB for Coolidge V2 */
16+
17+
typedef struct {
18+
const char *name;
19+
int id;
20+
const char *description;
21+
} kvx_pm_counter_t;
22+
23+
#define NB_COUNTERS_AVAILABLE 55
24+
static const kvx_pm_counter_t kvx_pm_counters[NB_COUNTERS_AVAILABLE] = {
25+
{ "PCC" , 0 /* _COS_PM_PCC */, "Processor Clock CycleSAT" },
26+
{ "ICC" , 1 /* _COS_PM_ICC */, "Idle Clock Cycle" },
27+
{ "EBE" , 2 /* _COS_PM_EBE */, "Executed Bundle Event" },
28+
{ "ENIE" , 3 /* _COS_PM_ENIE */, "Executed N Instructions Event" },
29+
{ "ENSE" , 4 /* _COS_PM_ENSE */, "Executed N Syllables Event" },
30+
{ "ICHE" , 5 /* _COS_PM_ICHE */, "I$ Hit Event" },
31+
{ "ICME" , 6 /* _COS_PM_ICME */, "I$ Miss Event" },
32+
{ "ICMABE" , 7 /* _COS_PM_ICMABE */, "I$ Memory Accesses Burst Event" },
33+
{ "MNGIC" , 8 /* _COS_PM_MNGIC */, "Memory Not Granting Instruction cache access Cycle" },
34+
{ "MIMHE" , 9 /* _COS_PM_MIMHE */, "MMU Instruction Micro-tlb Hit Event" },
35+
{ "MIMME" , 10 /* _COS_PM_MIMME */, "MMU Instruction Micro-tlb Miss Event" },
36+
{ "IATSC" , 11 /* _COS_PM_IATSC */, "Instruction Address Translation Stall Cycle" },
37+
{ "FE" , 12 /* _COS_PM_FE */, "Fetch Event" },
38+
{ "PBSC" , 13 /* _COS_PM_PBSC */, "Prefetch Buffer Starvation Cycle" },
39+
{ "PNVC" , 14 /* _COS_PM_PNVC */, "Pipeline No Valid Cycle" },
40+
{ "PSC" , 15 /* _COS_PM_PSC */, "Pipeline Starvation Cycle" },
41+
{ "TADBE" , 16 /* _COS_PM_TADBE */, "Taken Applicative Direct Branch Event" },
42+
{ "TABE" , 17 /* _COS_PM_TABE */, "Taken Applicative Branch Event" },
43+
{ "TBE" , 18 /* _COS_PM_TBE */, "Taken Branch Event" },
44+
{ "MDMHE" , 19 /* _COS_PM_MDMHE */, "MMU Data Micro-tlb Hit Event" },
45+
{ "MDMME" , 20 /* _COS_PM_MDMME */, "MMU Data Micro-tlb Miss Event" },
46+
{ "DATSC" , 21 /* _COS_PM_DATSC */, "Data Address Translation Stall Cycle" },
47+
{ "DCLHE" , 22 /* _COS_PM_DCLHE */, "D$ Load Hit Event" },
48+
{ "DCHE" , 23 /* _COS_PM_DCHE */, "D$ Hit Event" },
49+
{ "DCLME" , 24 /* _COS_PM_DCLME */, "D$ Load Miss Event" },
50+
{ "DCME" , 25 /* _COS_PM_DCME */, "D$ Miss Event" },
51+
{ "DARSC" , 26 /* _COS_PM_DARSC */, "Data Access Related Stall Cycle" },
52+
{ "LDSC" , 27 /* _COS_PM_LDSC */, "Load Dependency Stall Cycle" },
53+
{ "DCNGC" , 28 /* _COS_PM_DCNGC */, "Data Cache Not Granting Cycle" },
54+
{ "DMAE" , 29 /* _COS_PM_DMAE */, "Data Misaligned Access Event" },
55+
{ "LCFSC" , 30 /* _COS_PM_LCFSC */, "Load Cam Full Stall Cycle" },
56+
{ "MNGDC" , 31 /* _COS_PM_MNGDC */, "Memory Not Granting Data cache access Cycle" },
57+
{ "MACC" , 32 /* _COS_PM_MACC */, "Memory Accesses Conflict Cycle" },
58+
{ "TACC" , 33 /* _COS_PM_TACC */, "TLB Accesses Conflict Cycle" },
59+
{ "IWC" , 34 /* _COS_PM_IWC */, "Idle Wait Cycle" },
60+
{ "WISC" , 35 /* _COS_PM_WISC */, "Wait Instruction Stall Cycle" },
61+
{ "SISC" , 36 /* _COS_PM_SISC */, "Synchronization Instruction Stall Cycle" },
62+
{ "DDSC" , 37 /* _COS_PM_DDSC */, "Data Dependency Stall Cycle" },
63+
{ "SC" , 38 /* _COS_PM_SC */, "Stall Cycle" },
64+
{ "ELE" , 39 /* _COS_PM_ELE */, "Executed Load Event" },
65+
{ "ELNBE" , 40 /* _COS_PM_ELNBE */, "Executed Load N Bytes Event" },
66+
{ "ELUE" , 41 /* _COS_PM_ELUE */, "Executed Load Uncached Event" },
67+
{ "ELUNBE" , 42 /* _COS_PM_ELUNBE */, "Executed Load Uncached N Bytes Event" },
68+
{ "ESE" , 43 /* _COS_PM_ESE */, "Executed Store Event" },
69+
{ "ESNBE" , 44 /* _COS_PM_ESNBE */, "Executed Store N Bytes Event" },
70+
{ "EAE" , 45 /* _COS_PM_EAE */, "Executed Atomics Event" },
71+
{ "CIRE" , 46 /* _COS_PM_CIRE */, "Coherency Invalidation Request Event" },
72+
{ "CIE" , 47 /* _COS_PM_CIE */, "Coherency Invalidation Event" },
73+
{ "SE" , 48 /* _COS_PM_SE */, "Stop Event" },
74+
{ "RE" , 49 /* _COS_PM_RE */, "Reset Event" },
75+
{ "FSC" , 50 /* _COS_PM_FSC */, "Fetch Stall Cycle" },
76+
/* PMC available only on Coolidge V2 */
77+
{ "CPIRE" , 51 /* _COS_PM_CPIRE */, "Coherency Precise Invalidation Request Event" },
78+
{ "CPIE" , 52 /* _COS_PM_CPIE */, "Coherency Precise Invalidation Event" },
79+
{ "HUPEVICT" , 53 /* _COS_PM_HUPEVICT */, "Hit-Under-Prefetch Line Eviction Event" },
80+
{ "HUPHIT" , 54 /* _COS_PM_HUPHIT */, "Hit-Under-Prefetch Hit Event" },
81+
};
82+
83+
void mppa_setup_perf_events(char* event_names[], int n_events) {
84+
assert(n_events <= MAX_NB_PM_COUNTERS);
85+
int counter_ids[MAX_NB_PM_COUNTERS] = {0};
86+
for (int i = 0; i < n_events; i++) {
87+
const char *event_name = event_names[i];
88+
int j = 0;
89+
for (j = 0; j < NB_COUNTERS_AVAILABLE; j++) {
90+
if (strcmp(event_name, kvx_pm_counters[j].name) == 0) {
91+
counter_ids[i] = kvx_pm_counters[j].id;
92+
break;
93+
}
94+
}
95+
if (j == NB_COUNTERS_AVAILABLE) {
96+
printf("Event %s not found in the list of available events\n", event_name);
97+
assert(false);
98+
}
99+
}
100+
// Call runtime function
101+
mppa_setup_pm_counters(counter_ids, n_events);
102+
}
103+
104+
void mppa_read_perf_events_results(void* dst_handle) {
105+
mppa_copy_out_pm_counters_buffer(dst_handle);
106+
}
107+
108+
void mppa_reset_perf_events() {
109+
mppa_reset_pm_counters_buffer();
110+
}
111+
112+
// uint64_t mppa_get_frequency() already exists in the runtime, just need to call it

0 commit comments

Comments
 (0)