xdp: Fix TX HW timestamp correlation by frame sequence#52
xdp: Fix TX HW timestamp correlation by frame sequence#52chweelinchoong wants to merge 1 commit into
Conversation
| log_stat_user_selected == LOG_TX_TIMESTAMPS) && | ||
| rtt->backlog) { | ||
| /* Record Tx SW timestamp for the first frame in the cycle */ | ||
| uint64_t step = (fpc > 0) ? fpc : 1; |
There was a problem hiding this comment.
fpc should be always > 0.
There was a problem hiding this comment.
Done, thanks. Removed the unnecessary fallback
|
Thanks. Can you take a look at the AI review? |
2eb54ad to
aecc456
Compare
| rtt->backlog[idx].sw_ts = ts_to_ns(&tx_time); | ||
| for (c = base; c < end; c += fpc) { | ||
| if (c < cycle_number) | ||
| continue; |
There was a problem hiding this comment.
Thanks for updating. I think this works fine for the Tx HW TS path, but not for the other code. If Tx HW TS is disabled and fpc > 1 those frames will never get a sw_ts. This breaks the rt_time calculation.
Correct me If I'm wrong: I think we have two cases here: Either tx_hwtstamp_enabled is enabled, then only the first frame gets sw_ts. Otherwise, all frames require sw_ts.
What about the following diff? It also simplifies the call sites.
From b9c7bcdc7b4be373b557e358ecd8d2152d2e2f2c Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Thu, 18 Jun 2026 12:34:13 +0200
Subject: [PATCH] stat: Fix round trip time recording if Tx HW TS disabled
Fix round trip time recording if Tx HW TS disabled. While at it, move
timestamping logic into stat. This allows to cleanup the call sites.
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
---
src/dcp_thread.c | 4 ++--
src/layer2_thread.c | 22 +++++-----------------
src/lldp_thread.c | 4 ++--
src/rta_thread.c | 21 ++++-----------------
src/rtc_thread.c | 21 ++++-----------------
src/stat.c | 35 +++++++++++++++++++++--------------
src/tsn_thread.c | 21 +++++----------------
7 files changed, 43 insertions(+), 85 deletions(-)
diff --git a/src/dcp_thread.c b/src/dcp_thread.c
index 6082a71f70e2..abbae3f03eee 100644
--- a/src/dcp_thread.c
+++ b/src/dcp_thread.c
@@ -156,8 +156,8 @@ static void dcp_gen_and_send_frames(struct thread_context *thread_context, int s
len = dcp_send_messages(thread_context, socket_fd, destination,
thread_context->tx_frame_data, dcp_config->num_frames_per_cycle);
- for (i = 0; i < len; i++)
- stat_frame_sent(DCP_FRAME_TYPE, sequence_counter_begin + i);
+ if (len > 0)
+ stat_frames_sent_batch(DCP_FRAME_TYPE, sequence_counter_begin, len);
}
static void *dcp_tx_thread_routine(void *data)
diff --git a/src/layer2_thread.c b/src/layer2_thread.c
index 4284939cbe77..49650d23f48b 100644
--- a/src/layer2_thread.c
+++ b/src/layer2_thread.c
@@ -179,8 +179,8 @@ static int generic_l2_gen_and_send_frames(struct thread_context *thread_context,
thread_context->tx_frame_data, num_frames_per_cycle,
duration);
- for (i = 0; i < len; i++)
- stat_frame_sent(GENERICL2_FRAME_TYPE, sequence_counter_begin + i);
+ if (len > 0)
+ stat_frames_sent_batch(GENERICL2_FRAME_TYPE, sequence_counter_begin, len);
return len;
}
@@ -388,7 +388,6 @@ static void *generic_l2_xdp_tx_thread_routine(void *data)
sequence_counter += num_frames;
} else {
unsigned int received;
- uint64_t i;
pthread_mutex_lock(&thread_context->xdp_data_mutex);
@@ -404,20 +403,9 @@ static void *generic_l2_xdp_tx_thread_routine(void *data)
xsk_ring_prod__submit(&xsk->tx, received);
- if (received > 0) {
- if (l2_config->tx_hwtstamp_enabled) {
- /*
- * Once-per-cycle: record TX SW timestamp for the first
- * packet in this cycle and count all frames
- */
- stat_frames_sent_batch(GENERICL2_FRAME_TYPE,
- sequence_counter, received);
- } else {
- for (i = sequence_counter; i < sequence_counter + received;
- ++i)
- stat_frame_sent(GENERICL2_FRAME_TYPE, i);
- }
- }
+ if (received > 0)
+ stat_frames_sent_batch(GENERICL2_FRAME_TYPE, sequence_counter,
+ received);
xsk->outstanding_tx += received;
thread_context->received_frames = 0;
diff --git a/src/lldp_thread.c b/src/lldp_thread.c
index 59d18b7af03a..7cae8c85ab37 100644
--- a/src/lldp_thread.c
+++ b/src/lldp_thread.c
@@ -162,8 +162,8 @@ static int lldp_gen_and_send_frames(struct thread_context *thread_context, int s
len = lldp_send_messages(thread_context, socket_fd, destination,
thread_context->tx_frame_data, lldp_config->num_frames_per_cycle);
- for (i = 0; i < len; i++)
- stat_frame_sent(LLDP_FRAME_TYPE, sequence_counter_begin + i);
+ if (len > 0)
+ stat_frames_sent_batch(LLDP_FRAME_TYPE, sequence_counter_begin, len);
return len;
}
diff --git a/src/rta_thread.c b/src/rta_thread.c
index 25f8b128a8b9..179d86dbedc7 100644
--- a/src/rta_thread.c
+++ b/src/rta_thread.c
@@ -143,8 +143,8 @@ static int rta_gen_and_send_frames(struct thread_context *thread_context, int so
len = rta_send_messages(thread_context, socket_fd, destination,
thread_context->tx_frame_data, rta_config->num_frames_per_cycle);
- for (i = 0; i < len; i++)
- stat_frame_sent(RTA_FRAME_TYPE, sequence_counter_begin + i);
+ if (len > 0)
+ stat_frames_sent_batch(RTA_FRAME_TYPE, sequence_counter_begin, len);
return len;
}
@@ -354,7 +354,6 @@ static void *rta_xdp_tx_thread_routine(void *data)
}
} else {
unsigned int received;
- uint64_t i;
pthread_mutex_lock(&thread_context->xdp_data_mutex);
@@ -370,20 +369,8 @@ static void *rta_xdp_tx_thread_routine(void *data)
xsk_ring_prod__submit(&xsk->tx, received);
- if (received > 0) {
- if (rta_config->tx_hwtstamp_enabled) {
- /*
- * Once-per-cycle: record TX SW timestamp for the first
- * packet in this cycle and count all frames
- */
- stat_frames_sent_batch(RTA_FRAME_TYPE, sequence_counter,
- received);
- } else {
- for (i = sequence_counter; i < sequence_counter + received;
- ++i)
- stat_frame_sent(RTA_FRAME_TYPE, i);
- }
- }
+ if (received > 0)
+ stat_frames_sent_batch(RTA_FRAME_TYPE, sequence_counter, received);
xsk->outstanding_tx += received;
thread_context->received_frames = 0;
diff --git a/src/rtc_thread.c b/src/rtc_thread.c
index 4f3cf4294b49..e03bae059177 100644
--- a/src/rtc_thread.c
+++ b/src/rtc_thread.c
@@ -145,8 +145,8 @@ static int rtc_gen_and_send_frames(struct thread_context *thread_context, int so
len = rtc_send_messages(thread_context, socket_fd, destination,
thread_context->tx_frame_data, rtc_config->num_frames_per_cycle);
- for (i = 0; i < len; i++)
- stat_frame_sent(RTC_FRAME_TYPE, sequence_counter_begin + i);
+ if (len > 0)
+ stat_frames_sent_batch(RTC_FRAME_TYPE, sequence_counter_begin, len);
return len;
}
@@ -395,7 +395,6 @@ static void *rtc_xdp_tx_thread_routine(void *data)
sequence_counter += num_frames;
} else {
unsigned int received;
- uint64_t i;
pthread_mutex_lock(&thread_context->xdp_data_mutex);
@@ -411,20 +410,8 @@ static void *rtc_xdp_tx_thread_routine(void *data)
xsk_ring_prod__submit(&xsk->tx, received);
- if (received > 0) {
- if (rtc_config->tx_hwtstamp_enabled) {
- /*
- * Once-per-cycle: record TX SW timestamp for the first
- * packet in this cycle and count all frames
- */
- stat_frames_sent_batch(RTC_FRAME_TYPE, sequence_counter,
- received);
- } else {
- for (i = sequence_counter; i < sequence_counter + received;
- ++i)
- stat_frame_sent(RTC_FRAME_TYPE, i);
- }
- }
+ if (received > 0)
+ stat_frames_sent_batch(RTC_FRAME_TYPE, sequence_counter, received);
xsk->outstanding_tx += received;
thread_context->received_frames = 0;
diff --git a/src/stat.c b/src/stat.c
index dff975ab1396..ff1f1af4becc 100644
--- a/src/stat.c
+++ b/src/stat.c
@@ -460,7 +460,6 @@ void stat_frames_sent_batch(enum stat_frame_type frame_type, uint64_t cycle_numb
{
struct round_trip_context *rtt = &round_trip_contexts[frame_type];
struct statistics *stat = &global_statistics[frame_type];
- uint64_t fpc = app_config.classes[frame_type].num_frames_per_cycle;
struct timespec tx_time = {};
if (frame_count == 1) {
@@ -474,23 +473,31 @@ void stat_frames_sent_batch(enum stat_frame_type frame_type, uint64_t cycle_numb
if ((log_stat_user_selected == LOG_REFERENCE ||
log_stat_user_selected == LOG_TX_TIMESTAMPS) &&
rtt->backlog) {
- uint64_t base = (cycle_number / fpc) * fpc;
uint64_t end = cycle_number + frame_count;
uint64_t c;
- /*
- * Record Tx SW timestamp at the cycle-aligned first-frame slot of each
- * cycle this batch covers. The completion path looks up sw_ts at the
- * cycle boundary (seq rounded down to fpc), so writes must use the same
- * alignment. Skip cycles whose first frame was sent in an earlier batch
- * — that slot already holds the correct sw_ts and re-stamping it now
- * would shrink/invert the measured latency.
- */
app_clock_get(&tx_time);
- for (c = base; c < end; c += fpc) {
- if (c < cycle_number)
- continue;
- rtt->backlog[c % rtt->backlog_len].sw_ts = ts_to_ns(&tx_time);
+ if (app_config.classes[frame_type].tx_hwtstamp_enabled) {
+ uint64_t fpc = app_config.classes[frame_type].num_frames_per_cycle;
+ uint64_t base = (cycle_number / fpc) * fpc;
+
+ /*
+ * Record Tx SW timestamp at the cycle-aligned first-frame slot of each
+ * cycle this batch covers. The completion path looks up sw_ts at the cycle
+ * boundary (seq rounded down to fpc), so writes must use the same
+ * alignment. Skip cycles whose first frame was sent in an earlier batch —
+ * that slot already holds the correct sw_ts and re-stamping it now would
+ * shrink/invert the measured latency.
+ */
+ for (c = base; c < end; c += fpc) {
+ if (c < cycle_number)
+ continue;
+ rtt->backlog[c % rtt->backlog_len].sw_ts = ts_to_ns(&tx_time);
+ }
+ } else {
+ /* If Tx HW TS is not enabled, each frame has its own sw_ts. */
+ for (c = cycle_number; c < end; c++)
+ rtt->backlog[c % rtt->backlog_len].sw_ts = ts_to_ns(&tx_time);
}
}
diff --git a/src/tsn_thread.c b/src/tsn_thread.c
index d76a57bd3a89..5cf03f786967 100644
--- a/src/tsn_thread.c
+++ b/src/tsn_thread.c
@@ -156,8 +156,8 @@ static int tsn_gen_and_send_frames(struct thread_context *thread_context, int so
thread_context->tx_frame_data, tsn_config->num_frames_per_cycle,
duration);
- for (i = 0; i < len; i++)
- stat_frame_sent(thread_context->frame_type, sequence_counter_begin + i);
+ if (len > 0)
+ stat_frames_sent_batch(thread_context->frame_type, sequence_counter_begin, len);
return len;
}
@@ -446,20 +446,9 @@ static void *tsn_xdp_tx_thread_routine(void *data)
xsk_ring_prod__submit(&xsk->tx, received);
- if (received > 0) {
- if (tsn_config->tx_hwtstamp_enabled) {
- /*
- * Once-per-cycle: record TX SW timestamp for the first
- * packet in this cycle and count all frames
- */
- stat_frames_sent_batch(thread_context->frame_type,
- sequence_counter, received);
- } else {
- for (i = sequence_counter; i < sequence_counter + received;
- ++i)
- stat_frame_sent(thread_context->frame_type, i);
- }
- }
+ if (received > 0)
+ stat_frames_sent_batch(thread_context->frame_type, sequence_counter,
+ received);
xsk->outstanding_tx += received;
thread_context->received_frames = 0;
--
2.53.0
There was a problem hiding this comment.
Thanks Kurt, agreed. I applied your suggested diff, squashed it into the commit, and tested it.
1247184 to
4214649
Compare
The TX HW timestamp completion path correlated completions with backlog slots via seq_lagged, a snapshot of the previous cycle's sequence counter. This assumes TX completions are consumed exactly one cycle after submission. If a completion is delayed by two or more cycles, the lookup uses the wrong backlog slot: sw_ts may belong to a newer cycle, which can increment TxHwTimestampMissing, and rx_hw_ts may belong to a different cycle, which corrupts ProcFirst/ProcBatch latency. Use the sequence counter stored in the completed frame payload instead. Round it down to the cycle start so all timestamped frames in a cycle use the same once-per-cycle sw_ts slot, and derive first/last frame classification from seq % frames_per_cycle. In mirror mode, attach XDP_TXMD_FLAGS_TIMESTAMP based on the mirrored frame's position within the sender cycle instead of its position within the RX batch. RX batches are not guaranteed to align with cycle boundaries, so the old batch-index check could timestamp the wrong frame and produce unpaired ProcFirst/ProcBatch samples. Move the Tx SW timestamp recording policy into stat_frames_sent_batch(). When Tx HW timestamping is enabled, refresh sw_ts for each cycle-start slot covered by the transmitted batch. When Tx HW timestamping is disabled, record sw_ts for every frame. Suggested-by: Kurt Kanzenbach <kurt@linutronix.de> Co-developed-by: Kurt Kanzenbach <kurt@linutronix.de> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de> Co-developed-by: Song Yoong Siang <yoong.siang.song@intel.com> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com> Signed-off-by: Chwee-Lin Choong <chwee.lin.choong@intel.com>
The TX HW timestamp completion path correlated completions with backlog slots via seq_lagged, a snapshot of the previous cycle's sequence counter. This assumes TX completions are consumed exactly one cycle after submission. If a completion is delayed by two or more cycles, the lookup uses the wrong backlog slot: sw_ts may belong to a newer cycle, which can increment TxHwTimestampMissing, and rx_hw_ts may belong to a different cycle, which corrupts ProcFirst/ProcBatch latency.
Use the sequence counter stored in the completed frame payload instead. Round it down to the cycle start so all timestamped frames in a cycle use the same once-per-cycle sw_ts slot, and derive first/last frame classification from seq % frames_per_cycle.
In mirror mode, attach XDP_TXMD_FLAGS_TIMESTAMP based on the mirrored frame's position within the sender cycle instead of its position within the RX batch. RX batches are not guaranteed to align with cycle boundaries, so the old batch-index check could timestamp the wrong frame and produce unpaired ProcFirst/ProcBatch samples.
Also refresh sw_ts for every cycle-start slot covered by a transmitted batch, so multi-cycle mirror batches do not leave later cycle slots with stale timestamps from a previous backlog wrap.
Co-developed-by: Song Yoong Siang yoong.siang.song@intel.com