@@ -25,14 +25,16 @@ import (
2525)
2626
2727type scrubSchedule struct {
28- period time.Duration
29- upcoming time.Time
28+ period time.Duration
29+ timer * time.Timer
3030}
3131
3232// FSScrubController watches v1alpha1.Config and schedules filesystem online check tasks.
3333type FSScrubController struct {
3434 Runtime runtime.Runtime
3535 schedule map [string ]scrubSchedule
36+ // When a mountpoint is scheduled to be scrubbed, its path is sent to this channel to be processed in the Run function.
37+ c chan string
3638}
3739
3840// Name implements controller.Controller interface.
@@ -63,38 +65,26 @@ func (ctrl *FSScrubController) Outputs() []controller.Output {
6365
6466// Run implements controller.Controller interface.
6567func (ctrl * FSScrubController ) Run (ctx context.Context , r controller.Runtime , logger * zap.Logger ) error {
66- var (
67- ticker * time.Ticker
68- tickerC <- chan time.Time
69- )
70-
71- tickerStop := func () {
72- if ticker == nil {
73- return
68+ stopTimers := func () {
69+ for _ , task := range ctrl .schedule {
70+ if task .timer != nil {
71+ task .timer .Stop ()
72+ }
7473 }
75-
76- ticker .Stop ()
77-
78- ticker = nil
79- tickerC = nil
8074 }
8175
82- defer tickerStop ()
83-
84- tickerStop ()
85-
86- ticker = time .NewTicker (15 * time .Second )
87- tickerC = ticker .C
76+ defer stopTimers ()
8877
8978 ctrl .schedule = make (map [string ]scrubSchedule )
79+ ctrl .c = make (chan string )
9080
9181 for {
9282 select {
9383 case <- ctx .Done ():
9484 return nil
95- case <- tickerC :
96- if err := ctrl .scrub ( "/var" , []string {}); err != nil {
97- return fmt . Errorf ( " error running filesystem scrub: %w " , err )
85+ case mountpoint := <- ctrl . c :
86+ if err := ctrl .runScrub ( mountpoint , []string {}); err != nil {
87+ logger . Error ( "!!! scrub !!! error running filesystem scrub" , zap . Error ( err ) )
9888 }
9989
10090 continue
@@ -113,50 +103,57 @@ func (ctrl *FSScrubController) updateSchedule(ctx context.Context, r controller.
113103 return fmt .Errorf ("error getting volume status: %w" , err )
114104 }
115105
116- logger .Warn ("reading volume status" )
117- volumesStatus . ForEach ( func ( item * block. VolumeStatus ) {
106+ logger .Warn ("!!! scrub !!! reading volume status" )
107+ for item := range volumesStatus . All ( ) {
118108 vol := item .TypedSpec ()
119109
120- logger .Warn ("volume status" , zap .Reflect ("item " , vol ))
110+ logger .Warn ("!!! scrub !!! volume status" , zap .Reflect ("volume " , vol ))
121111
122112 if vol .Phase != block .VolumePhaseReady {
123- logger .Warn ("vol.Phase != block.VolumePhaseReady" , zap .Reflect ("item" , vol ))
113+ logger .Warn ("!!! scrub !!! vol.Phase != block.VolumePhaseReady" , zap .Reflect ("item" , vol ))
124114
125- return
115+ continue
126116 }
127117
128118 if vol .Filesystem != block .FilesystemTypeXFS {
129- logger .Warn ("vol.Filesystem != block.FilesystemTypeXFS" , zap .Reflect ("item" , vol ))
119+ logger .Warn ("!!! scrub !!! vol.Filesystem != block.FilesystemTypeXFS" , zap .Reflect ("item" , vol ))
130120
131- return
121+ continue
132122 }
133123
134124 volumeConfig , err := safe .ReaderGetByID [* block.VolumeConfig ](ctx , r , item .Metadata ().ID ())
135125 if err != nil {
136- logger .Warn ("err" , zap .Error (err ))
137-
138- return
126+ return fmt .Errorf ("!!! scrub !!! error getting volume config: %w" , err )
139127 }
140128
141129 mountpoint := volumeConfig .TypedSpec ().Mount .TargetPath
142130
143131 if _ , ok := ctrl .schedule [mountpoint ]; ! ok {
144132 per := 10 * time .Second
145- seconds := rand .Int64N (int64 (per .Seconds ()))
133+ firstTimeout := time .Duration (rand .Int64N (int64 (per .Seconds ()))) * time .Second
134+ logger .Warn ("!!! scrub !!! firstTimeout" , zap .Duration ("firstTimeout" , firstTimeout ))
135+
136+ // When scheduling the first scrub, we use a random time to avoid all scrubs running in a row.
137+ // After the first scrub, we use the period defined in the config.
138+ cb := func () {
139+ logger .Warn ("!!! scrub !!! ding" , zap .String ("path" , mountpoint ))
140+ ctrl .c <- mountpoint
141+ ctrl .schedule [mountpoint ].timer .Reset (ctrl .schedule [mountpoint ].period )
142+ }
146143
147144 ctrl .schedule [mountpoint ] = scrubSchedule {
148- period : per ,
149- upcoming : time .Now (). Add ( time . Duration ( seconds * int64 ( time . Second )) ),
145+ period : per ,
146+ timer : time .AfterFunc ( firstTimeout , cb ),
150147 }
151148
152- logger .Warn ("scheduled" , zap .String ("path" , mountpoint ), zap . Reflect ( "upcoming" , ctrl . schedule [ mountpoint ]. upcoming ))
149+ logger .Warn ("!!! scrub !!! scheduled" , zap .String ("path" , mountpoint ))
153150 }
154- })
151+ }
155152
156- return nil
153+ return err
157154}
158155
159- func (ctrl * FSScrubController ) scrub (mountpoint string , opts []string ) error {
156+ func (ctrl * FSScrubController ) runScrub (mountpoint string , opts []string ) error {
160157 args := []string {"/usr/sbin/xfs_scrub" , "-T" , "-v" }
161158 args = append (args , opts ... )
162159 args = append (args , mountpoint )
0 commit comments