Skip to content

Commit e178d4d

Browse files
committed
schedule scrub
Signed-off-by: Dmitry Sharshakov <dmitry.sharshakov@siderolabs.com>
1 parent ce8ea36 commit e178d4d

File tree

1 file changed

+38
-41
lines changed
  • internal/app/machined/pkg/controllers/runtime

1 file changed

+38
-41
lines changed

internal/app/machined/pkg/controllers/runtime/fs_scrub.go

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,16 @@ import (
2525
)
2626

2727
type scrubSchedule struct {
28-
period time.Duration
29-
upcoming time.Time
28+
period time.Duration
29+
timer *time.Timer
3030
}
3131

3232
// FSScrubController watches v1alpha1.Config and schedules filesystem online check tasks.
3333
type FSScrubController struct {
3434
Runtime runtime.Runtime
3535
schedule map[string]scrubSchedule
36+
// When a mountpoint is scheduled to be scrubbed, its path is sent to this channel to be processed in the Run function.
37+
c chan string
3638
}
3739

3840
// Name implements controller.Controller interface.
@@ -63,38 +65,26 @@ func (ctrl *FSScrubController) Outputs() []controller.Output {
6365

6466
// Run implements controller.Controller interface.
6567
func (ctrl *FSScrubController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
66-
var (
67-
ticker *time.Ticker
68-
tickerC <-chan time.Time
69-
)
70-
71-
tickerStop := func() {
72-
if ticker == nil {
73-
return
68+
stopTimers := func() {
69+
for _, task := range ctrl.schedule {
70+
if task.timer != nil {
71+
task.timer.Stop()
72+
}
7473
}
75-
76-
ticker.Stop()
77-
78-
ticker = nil
79-
tickerC = nil
8074
}
8175

82-
defer tickerStop()
83-
84-
tickerStop()
85-
86-
ticker = time.NewTicker(15 * time.Second)
87-
tickerC = ticker.C
76+
defer stopTimers()
8877

8978
ctrl.schedule = make(map[string]scrubSchedule)
79+
ctrl.c = make(chan string)
9080

9181
for {
9282
select {
9383
case <-ctx.Done():
9484
return nil
95-
case <-tickerC:
96-
if err := ctrl.scrub("/var", []string{}); err != nil {
97-
return fmt.Errorf("error running filesystem scrub: %w", err)
85+
case mountpoint := <-ctrl.c:
86+
if err := ctrl.runScrub(mountpoint, []string{}); err != nil {
87+
logger.Error("!!! scrub !!! error running filesystem scrub", zap.Error(err))
9888
}
9989

10090
continue
@@ -113,50 +103,57 @@ func (ctrl *FSScrubController) updateSchedule(ctx context.Context, r controller.
113103
return fmt.Errorf("error getting volume status: %w", err)
114104
}
115105

116-
logger.Warn("reading volume status")
117-
volumesStatus.ForEach(func(item *block.VolumeStatus) {
106+
logger.Warn("!!! scrub !!! reading volume status")
107+
for item := range volumesStatus.All() {
118108
vol := item.TypedSpec()
119109

120-
logger.Warn("volume status", zap.Reflect("item", vol))
110+
logger.Warn("!!! scrub !!! volume status", zap.Reflect("volume", vol))
121111

122112
if vol.Phase != block.VolumePhaseReady {
123-
logger.Warn("vol.Phase != block.VolumePhaseReady", zap.Reflect("item", vol))
113+
logger.Warn("!!! scrub !!! vol.Phase != block.VolumePhaseReady", zap.Reflect("item", vol))
124114

125-
return
115+
continue
126116
}
127117

128118
if vol.Filesystem != block.FilesystemTypeXFS {
129-
logger.Warn("vol.Filesystem != block.FilesystemTypeXFS", zap.Reflect("item", vol))
119+
logger.Warn("!!! scrub !!! vol.Filesystem != block.FilesystemTypeXFS", zap.Reflect("item", vol))
130120

131-
return
121+
continue
132122
}
133123

134124
volumeConfig, err := safe.ReaderGetByID[*block.VolumeConfig](ctx, r, item.Metadata().ID())
135125
if err != nil {
136-
logger.Warn("err", zap.Error(err))
137-
138-
return
126+
return fmt.Errorf("!!! scrub !!! error getting volume config: %w", err)
139127
}
140128

141129
mountpoint := volumeConfig.TypedSpec().Mount.TargetPath
142130

143131
if _, ok := ctrl.schedule[mountpoint]; !ok {
144132
per := 10 * time.Second
145-
seconds := rand.Int64N(int64(per.Seconds()))
133+
firstTimeout := time.Duration(rand.Int64N(int64(per.Seconds()))) * time.Second
134+
logger.Warn("!!! scrub !!! firstTimeout", zap.Duration("firstTimeout", firstTimeout))
135+
136+
// When scheduling the first scrub, we use a random time to avoid all scrubs running in a row.
137+
// After the first scrub, we use the period defined in the config.
138+
cb := func() {
139+
logger.Warn("!!! scrub !!! ding", zap.String("path", mountpoint))
140+
ctrl.c <- mountpoint
141+
ctrl.schedule[mountpoint].timer.Reset(ctrl.schedule[mountpoint].period)
142+
}
146143

147144
ctrl.schedule[mountpoint] = scrubSchedule{
148-
period: per,
149-
upcoming: time.Now().Add(time.Duration(seconds * int64(time.Second))),
145+
period: per,
146+
timer: time.AfterFunc(firstTimeout, cb),
150147
}
151148

152-
logger.Warn("scheduled", zap.String("path", mountpoint), zap.Reflect("upcoming", ctrl.schedule[mountpoint].upcoming))
149+
logger.Warn("!!! scrub !!! scheduled", zap.String("path", mountpoint))
153150
}
154-
})
151+
}
155152

156-
return nil
153+
return err
157154
}
158155

159-
func (ctrl *FSScrubController) scrub(mountpoint string, opts []string) error {
156+
func (ctrl *FSScrubController) runScrub(mountpoint string, opts []string) error {
160157
args := []string{"/usr/sbin/xfs_scrub", "-T", "-v"}
161158
args = append(args, opts...)
162159
args = append(args, mountpoint)

0 commit comments

Comments
 (0)