Skip to content

Commit 3a78260

Browse files
feat(redundancy): add redundancy level configuration for smoke check (#556)
* feat(redundancy): add redundancy level configuration for smoke check * feat(redundancy): add redundancy level configuration to download * feat(redundancy): update redundancy level configuration to support multiple levels * refactor(redundancy): remove deprecated r-level configuration in favor of r-levels * fix(api): update header variable name for redundancy level in API requests * fix: update redundancy level handling to use pointers for options * chore(smoke): remove logging of redundancy levels in smoke check * feat(smoke): add redundancy level and file size dimensions to metrics (#575) Add redundancy_level label to all smoke check metrics to enable per-redundancy-level and per-file-size analysis in Grafana dashboards. - Add redundancy_level label to all per-operation metric vectors - Change throughput metrics from Gauge to Histogram for quantile support - Add upload_success/download_success counters - Add uploaded_bytes_total/downloaded_bytes_total counters * fix: simplify redundancy level checks by removing direct comparison to NONE * fix: revert back changed metrics --------- Co-authored-by: Ljubiša Gačević <35105035+gacevicljubisa@users.noreply.github.com> Co-authored-by: Ljubisa Gacevic <ljubisa.rs@gmail.com>
1 parent 9ddeff7 commit 3a78260

9 files changed

Lines changed: 249 additions & 134 deletions

File tree

config/local.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ checks:
301301
download-timeout: 1m
302302
iteration-wait: 5m
303303
duration: 10m
304+
r-levels: [0, 2, 4]
304305
timeout: 11m
305306
type: smoke
306307
ci-load:

pkg/bee/api/api.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const (
3232
swarmFeedIndexNextHeader = "Swarm-Feed-Index-Next"
3333
swarmIndexDocumentHeader = "Swarm-Index-Document"
3434
swarmErrorDocumentHeader = "Swarm-Error-Document"
35+
swarmRedundancyLevelHeader = "Swarm-Redundancy-Level"
3536
)
3637

3738
// Client manages communication with the Bee API.
@@ -223,6 +224,9 @@ func (c *Client) requestDataGetHeader(ctx context.Context, method, path string,
223224
if opts != nil && opts.Cache != nil {
224225
req.Header.Set(swarmCacheDownloadHeader, strconv.FormatBool(*opts.Cache))
225226
}
227+
if opts != nil && opts.RLevel != nil {
228+
req.Header.Set(swarmRedundancyLevelHeader, strconv.Itoa(int(*opts.RLevel)))
229+
}
226230
if opts != nil && opts.RedundancyFallbackMode != nil {
227231
req.Header.Set(swarmRedundancyFallbackMode, strconv.FormatBool(*opts.RedundancyFallbackMode))
228232
}

pkg/bee/api/bytes.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ func (b *BytesService) Upload(ctx context.Context, data io.Reader, o UploadOptio
3434
}
3535
h.Add(deferredUploadHeader, strconv.FormatBool(!o.Direct))
3636
h.Add(postageStampBatchHeader, o.BatchID)
37+
if o.RLevel != nil {
38+
h.Add(swarmRedundancyLevelHeader, strconv.Itoa(int(*o.RLevel)))
39+
}
40+
3741
err := b.client.requestWithHeader(ctx, http.MethodPost, "/"+apiVersion+"/bytes", h, data, &resp)
3842
return resp, err
3943
}

pkg/bee/api/options.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package api
22

3-
import "github.com/ethersphere/bee/v2/pkg/swarm"
3+
import (
4+
"github.com/ethersphere/bee/v2/pkg/file/redundancy"
5+
"github.com/ethersphere/bee/v2/pkg/swarm"
6+
)
47

58
type UploadOptions struct {
69
Act bool
@@ -9,6 +12,7 @@ type UploadOptions struct {
912
BatchID string
1013
Direct bool
1114
ActHistoryAddress swarm.Address
15+
RLevel *redundancy.Level
1216

1317
// Dirs
1418
IndexDocument string
@@ -21,6 +25,7 @@ type DownloadOptions struct {
2125
ActPublicKey *swarm.Address
2226
ActTimestamp *uint64
2327
Cache *bool
28+
RLevel *redundancy.Level
2429
RedundancyFallbackMode *bool
2530
OnlyRootChunk *bool
2631
}

pkg/check/load/load.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ func (c *Check) run(ctx context.Context, cluster orchestration.Cluster, o Option
203203

204204
c.logger.WithField("batch_id", batchID).Infof("node %s: using batch", uploader.Name())
205205

206-
address, duration, err = test.Upload(ctx, uploader, txData, batchID)
206+
address, duration, err = test.Upload(ctx, uploader, txData, batchID, nil)
207207
if err != nil {
208208
c.metrics.UploadErrors.WithLabelValues(sizeLabel).Inc()
209209
c.logger.Errorf("upload failed: %v", err)
@@ -246,7 +246,7 @@ func (c *Check) run(ctx context.Context, cluster orchestration.Cluster, o Option
246246

247247
c.metrics.DownloadAttempts.WithLabelValues(sizeLabel).Inc()
248248

249-
rxData, rxDuration, err = test.Download(ctx, downloader, address)
249+
rxData, rxDuration, err = test.Download(ctx, downloader, address, nil)
250250
if err != nil {
251251
c.metrics.DownloadErrors.WithLabelValues(sizeLabel).Inc()
252252
c.logger.Errorf("download failed for size %d: %v", contentSize, err)

pkg/check/smoke/metrics.go

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,23 @@ type metrics struct {
1010
BatchCreateAttempts prometheus.Counter
1111
UploadErrors *prometheus.CounterVec
1212
UploadAttempts *prometheus.CounterVec
13+
UploadSuccess *prometheus.CounterVec
1314
DownloadErrors *prometheus.CounterVec
1415
DownloadMismatch *prometheus.CounterVec
1516
DownloadAttempts *prometheus.CounterVec
17+
DownloadSuccess *prometheus.CounterVec
1618
UploadDuration *prometheus.HistogramVec
1719
DownloadDuration *prometheus.HistogramVec
1820
UploadThroughput *prometheus.GaugeVec
1921
DownloadThroughput *prometheus.GaugeVec
22+
UploadedBytes *prometheus.CounterVec
23+
DownloadedBytes *prometheus.CounterVec
2024
}
2125

2226
const (
23-
labelSizeBytes = "size_bytes"
24-
labelNodeName = "node_name"
27+
labelSizeBytes = "size_bytes"
28+
labelNodeName = "node_name"
29+
labelRedundancyLevel = "redundancy_level"
2530
)
2631

2732
func newMetrics(subsystem string) metrics {
@@ -49,7 +54,7 @@ func newMetrics(subsystem string) metrics {
4954
Name: "upload_attempts",
5055
Help: "Number of upload attempts.",
5156
},
52-
[]string{labelSizeBytes, labelNodeName},
57+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
5358
),
5459
DownloadAttempts: prometheus.NewCounterVec(
5560
prometheus.CounterOpts{
@@ -58,7 +63,7 @@ func newMetrics(subsystem string) metrics {
5863
Name: "download_attempts",
5964
Help: "Number of download attempts.",
6065
},
61-
[]string{labelSizeBytes, labelNodeName},
66+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
6267
),
6368
UploadErrors: prometheus.NewCounterVec(
6469
prometheus.CounterOpts{
@@ -67,7 +72,7 @@ func newMetrics(subsystem string) metrics {
6772
Name: "upload_errors_count",
6873
Help: "The total number of errors encountered before successful upload.",
6974
},
70-
[]string{labelSizeBytes, labelNodeName},
75+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
7176
),
7277
DownloadErrors: prometheus.NewCounterVec(
7378
prometheus.CounterOpts{
@@ -76,7 +81,7 @@ func newMetrics(subsystem string) metrics {
7681
Name: "download_errors_count",
7782
Help: "The total number of errors encountered before successful download.",
7883
},
79-
[]string{labelSizeBytes, labelNodeName},
84+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
8085
),
8186
DownloadMismatch: prometheus.NewCounterVec(
8287
prometheus.CounterOpts{
@@ -85,27 +90,27 @@ func newMetrics(subsystem string) metrics {
8590
Name: "download_mismatch",
8691
Help: "The total number of times uploaded data is different from downloaded data.",
8792
},
88-
[]string{labelSizeBytes, labelNodeName},
93+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
8994
),
9095
UploadDuration: prometheus.NewHistogramVec(
9196
prometheus.HistogramOpts{
9297
Namespace: m.Namespace,
9398
Subsystem: subsystem,
9499
Name: "data_upload_duration",
95100
Help: "Data upload duration through the /bytes endpoint.",
96-
Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 600, 1200},
101+
Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 600, 1200, 1800, 3600},
97102
},
98-
[]string{labelSizeBytes, labelNodeName},
103+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
99104
),
100105
DownloadDuration: prometheus.NewHistogramVec(
101106
prometheus.HistogramOpts{
102107
Namespace: m.Namespace,
103108
Subsystem: subsystem,
104109
Name: "data_download_duration",
105110
Help: "Data download duration through the /bytes endpoint.",
106-
Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 600, 1200},
111+
Buckets: []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 600, 1200, 1800, 3600},
107112
},
108-
[]string{labelSizeBytes, labelNodeName},
113+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
109114
),
110115
UploadThroughput: prometheus.NewGaugeVec(
111116
prometheus.GaugeOpts{
@@ -114,7 +119,7 @@ func newMetrics(subsystem string) metrics {
114119
Name: "upload_throughput_bytes_per_second",
115120
Help: "Upload throughput in bytes per second.",
116121
},
117-
[]string{labelSizeBytes, labelNodeName},
122+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
118123
),
119124
DownloadThroughput: prometheus.NewGaugeVec(
120125
prometheus.GaugeOpts{
@@ -123,7 +128,43 @@ func newMetrics(subsystem string) metrics {
123128
Name: "download_throughput_bytes_per_second",
124129
Help: "Download throughput in bytes per second.",
125130
},
126-
[]string{labelSizeBytes, labelNodeName},
131+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
132+
),
133+
UploadSuccess: prometheus.NewCounterVec(
134+
prometheus.CounterOpts{
135+
Namespace: m.Namespace,
136+
Subsystem: subsystem,
137+
Name: "upload_success",
138+
Help: "Number of successful uploads.",
139+
},
140+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
141+
),
142+
DownloadSuccess: prometheus.NewCounterVec(
143+
prometheus.CounterOpts{
144+
Namespace: m.Namespace,
145+
Subsystem: subsystem,
146+
Name: "download_success",
147+
Help: "Number of successful downloads with matching data.",
148+
},
149+
[]string{labelSizeBytes, labelNodeName, labelRedundancyLevel},
150+
),
151+
UploadedBytes: prometheus.NewCounterVec(
152+
prometheus.CounterOpts{
153+
Namespace: m.Namespace,
154+
Subsystem: subsystem,
155+
Name: "uploaded_bytes_total",
156+
Help: "Total bytes successfully uploaded.",
157+
},
158+
[]string{labelNodeName, labelRedundancyLevel},
159+
),
160+
DownloadedBytes: prometheus.NewCounterVec(
161+
prometheus.CounterOpts{
162+
Namespace: m.Namespace,
163+
Subsystem: subsystem,
164+
Name: "downloaded_bytes_total",
165+
Help: "Total bytes successfully downloaded.",
166+
},
167+
[]string{labelNodeName, labelRedundancyLevel},
127168
),
128169
}
129170
}

0 commit comments

Comments
 (0)