Skip to content

Commit e53010a

Browse files
committed
Allow disabling state reconciliation (and make that the default)
1 parent b7bf384 commit e53010a

6 files changed

Lines changed: 110 additions & 38 deletions

File tree

.github/workflows/lint-test.yml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,33 @@ jobs:
5353
env:
5454
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
5555

56-
integration-test:
56+
integration-test-latest:
5757
needs: [run]
5858
permissions:
5959
contents: read
6060
runs-on: ubuntu-24.04
61+
strategy:
62+
matrix:
63+
traefik: [latest]
64+
steps:
65+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
66+
67+
- name: run
68+
run: go run test.go
69+
working-directory: ./ci
70+
env:
71+
TRAEFIK_TAG: ${{ matrix.traefik }}
72+
73+
- name: cleanup
74+
if: ${{ always() }}
75+
run: docker compose logs --tail 100 nginx nginx2 traefik && docker compose down
76+
working-directory: ./ci
77+
78+
integration-test-backwards-compatibility:
79+
needs: [integration-test]
80+
permissions:
81+
contents: read
82+
runs-on: ubuntu-24.04
6183
strategy:
6284
matrix:
6385
traefik: [v2.11, v3.0, v3.1, v3.2, v3.3, v3.4, v3.5]

.traefik.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ testData:
1111
CaptchaProvider: turnstile
1212
SiteKey: 1x00000000000000000000AA
1313
SecretKey: 1x0000000000000000000000000000000AA
14+
EnableStateReconciliation: "false"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ services:
119119
| `enableStatsPage` | `string` | `"false"` | Allows `exemptIps` to access `/captcha-protect/stats` to monitor the rate limiter. |
120120
| `logLevel` | `string` | `"INFO"` | Log level for the middleware. Options: `ERROR`, `WARNING`, `INFO`, or `DEBUG`. |
121121
| `persistentStateFile` | `string` | `""` | File path to persist rate limiter state across Traefik restarts. In Docker, mount this file from the host. |
122+
| `enableStateReconciliation` | `string` | `"false"` | When `"true"`, reads and merges disk state before each save to prevent multiple instances from overwriting data. Adds extra I/O overhead. Only enable for multi-instance deployments sharing state. |
122123

123124

124125
### Good Bots

ci/docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ services:
4949
traefik.http.middlewares.captcha-protect.plugin.captcha-protect.goodBots: ""
5050
traefik.http.middlewares.captcha-protect.plugin.captcha-protect.protectRoutes: "/"
5151
traefik.http.middlewares.captcha-protect.plugin.captcha-protect.persistentStateFile: "/tmp/state.json"
52+
traefik.http.middlewares.captcha-protect.plugin.captcha-protect.enableStateReconciliation: "true"
5253
healthcheck:
5354
test: curl -fs http://localhost/healthz | grep -q OK || exit 1
5455
volumes:

ci/test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func main() {
5757

5858
fmt.Println("\nTesting state sharing between nginx instances...")
5959
fmt.Println("Waiting 2 seconds for state to save to disk...")
60-
time.Sleep(2 * time.Second)
60+
time.Sleep(cp.StateSaveInterval + (5 * time.Second))
6161
testStateSharing(ips)
6262

6363
fmt.Println("Sleeping for 2m")
@@ -69,7 +69,7 @@ func main() {
6969
// make sure the state has time to save
7070
fmt.Println("Waiting for state to save")
7171
runCommand("jq", ".", "tmp/state.json")
72-
time.Sleep(cp.StateSaveInterval + (2 * time.Second))
72+
time.Sleep(cp.StateSaveInterval + (5 * time.Second))
7373
runCommand("jq", ".", "tmp/state.json")
7474

7575
runCommand("docker", "container", "stats", "--no-stream")

main.go

Lines changed: 82 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/json"
66
"fmt"
77
"log/slog"
8+
"math/rand"
89
"net"
910
"net/http"
1011
"net/url"
@@ -26,6 +27,8 @@ import (
2627
const (
2728
// StateSaveInterval is how often the persistent state file is written to disk
2829
StateSaveInterval = 5 * time.Second
30+
// StateSaveJitter is the maximum random jitter added to save interval to prevent thundering herd
31+
StateSaveJitter = 2 * time.Second
2932
)
3033

3134
type Config struct {
@@ -54,7 +57,12 @@ type Config struct {
5457
EnableStatsPage string `json:"enableStatsPage"`
5558
LogLevel string `json:"loglevel,omitempty"`
5659
PersistentStateFile string `json:"persistentStateFile"`
57-
Mode string `json:"mode"`
60+
// EnableStateReconciliation is a string instead of bool due to Traefik's label parsing limitations
61+
// When enabled, the plugin will read and merge state from disk before each save to prevent
62+
// multiple instances from overwriting each other's data. This adds extra I/O overhead.
63+
// Only enable this if running multiple plugin instances sharing the same state file.
64+
EnableStateReconciliation string `json:"enableStateReconciliation"`
65+
Mode string `json:"mode"`
5866
}
5967

6068
type CaptchaProtect struct {
@@ -87,27 +95,28 @@ type captchaResponse struct {
8795

8896
func CreateConfig() *Config {
8997
return &Config{
90-
RateLimit: 20,
91-
Window: 86400,
92-
IPv4SubnetMask: 16,
93-
IPv6SubnetMask: 64,
94-
IPForwardedHeader: "",
95-
ProtectParameters: "false",
96-
ProtectRoutes: []string{},
97-
ExcludeRoutes: []string{},
98-
ProtectHttpMethods: []string{},
99-
ProtectFileExtensions: []string{},
100-
GoodBots: []string{},
101-
ExemptIPs: []string{},
102-
ExemptUserAgents: []string{},
103-
ChallengeURL: "/challenge",
104-
ChallengeTmpl: "challenge.tmpl.html",
105-
ChallengeStatusCode: 0,
106-
EnableStatsPage: "false",
107-
LogLevel: "INFO",
108-
IPDepth: 0,
109-
CaptchaProvider: "turnstile",
110-
Mode: "prefix",
98+
RateLimit: 20,
99+
Window: 86400,
100+
IPv4SubnetMask: 16,
101+
IPv6SubnetMask: 64,
102+
IPForwardedHeader: "",
103+
ProtectParameters: "false",
104+
ProtectRoutes: []string{},
105+
ExcludeRoutes: []string{},
106+
ProtectHttpMethods: []string{},
107+
ProtectFileExtensions: []string{},
108+
GoodBots: []string{},
109+
ExemptIPs: []string{},
110+
ExemptUserAgents: []string{},
111+
ChallengeURL: "/challenge",
112+
ChallengeTmpl: "challenge.tmpl.html",
113+
ChallengeStatusCode: 0,
114+
EnableStatsPage: "false",
115+
LogLevel: "INFO",
116+
IPDepth: 0,
117+
CaptchaProvider: "turnstile",
118+
Mode: "prefix",
119+
EnableStateReconciliation: "false",
111120
}
112121
}
113122

@@ -705,7 +714,13 @@ func (c *Config) ParseHttpMethods(log *slog.Logger) {
705714
}
706715

707716
func (bc *CaptchaProtect) saveState(ctx context.Context) {
708-
ticker := time.NewTicker(StateSaveInterval)
717+
// Add random jitter to prevent multiple instances from trying to save simultaneously
718+
jitter := time.Duration(rand.Intn(int(StateSaveJitter.Milliseconds()))) * time.Millisecond
719+
interval := StateSaveInterval + jitter
720+
721+
bc.log.Debug("State save configured", "baseInterval", StateSaveInterval, "jitter", jitter, "actualInterval", interval)
722+
723+
ticker := time.NewTicker(interval)
709724
defer ticker.Stop()
710725

711726
file, err := os.OpenFile(bc.config.PersistentStateFile, os.O_CREATE|os.O_WRONLY, 0644)
@@ -730,9 +745,12 @@ func (bc *CaptchaProtect) saveState(ctx context.Context) {
730745
}
731746
}
732747

733-
// saveStateNow performs an immediate state save with file locking and reconciliation.
734-
// This prevents multiple plugin instances from overwriting each other's state.
748+
// saveStateNow performs an immediate state save with file locking and optional reconciliation.
749+
// When reconciliation is enabled, it reads and merges state from disk before saving to prevent
750+
// multiple plugin instances from overwriting each other's data (at the cost of extra I/O).
735751
func (bc *CaptchaProtect) saveStateNow() {
752+
startTime := time.Now()
753+
736754
lock, err := state.NewFileLock(bc.config.PersistentStateFile + ".lock")
737755
if err != nil {
738756
bc.log.Error("failed to create file lock for saving", "err", err)
@@ -744,33 +762,62 @@ func (bc *CaptchaProtect) saveStateNow() {
744762
bc.log.Error("failed to acquire lock for saving state", "err", err)
745763
return
746764
}
765+
lockDuration := time.Since(startTime)
747766

748-
// First, load and reconcile with existing file state
749-
// This ensures we don't overwrite newer data from other instances
750-
fileContent, err := os.ReadFile(bc.config.PersistentStateFile)
751-
if err == nil && len(fileContent) > 0 {
752-
var fileState state.State
753-
if err := json.Unmarshal(fileContent, &fileState); err == nil {
754-
bc.log.Debug("Reconciling state before save")
755-
state.ReconcileState(fileState, bc.rateCache, bc.botCache, bc.verifiedCache)
767+
var readDuration, reconcileDuration, marshalDuration, writeDuration time.Duration
768+
769+
// Reconcile with existing file state if enabled
770+
// This prevents multiple instances from overwriting each other's data
771+
if bc.config.EnableStateReconciliation == "true" {
772+
readStart := time.Now()
773+
fileContent, err := os.ReadFile(bc.config.PersistentStateFile)
774+
readDuration = time.Since(readStart)
775+
776+
if err == nil && len(fileContent) > 0 {
777+
reconcileStart := time.Now()
778+
var fileState state.State
779+
if err := json.Unmarshal(fileContent, &fileState); err == nil {
780+
bc.log.Debug("Reconciling state before save", "fileBytes", len(fileContent))
781+
state.ReconcileState(fileState, bc.rateCache, bc.botCache, bc.verifiedCache)
782+
}
783+
reconcileDuration = time.Since(reconcileStart)
756784
}
757785
}
758786

759-
// Now save our current state
787+
// Marshal current state
788+
marshalStart := time.Now()
760789
currentState := state.GetState(bc.rateCache.Items(), bc.botCache.Items(), bc.verifiedCache.Items())
761790
jsonData, err := json.Marshal(currentState)
791+
marshalDuration = time.Since(marshalStart)
792+
762793
if err != nil {
763794
bc.log.Error("failed to marshal state data", "err", err)
764795
return
765796
}
766797

798+
// Write to disk
799+
writeStart := time.Now()
767800
err = os.WriteFile(bc.config.PersistentStateFile, jsonData, 0644)
801+
writeDuration = time.Since(writeStart)
802+
768803
if err != nil {
769804
bc.log.Error("failed to save state data", "err", err)
770805
return
771806
}
772807

773-
bc.log.Debug("State saved successfully")
808+
totalDuration := time.Since(startTime)
809+
bc.log.Debug("State saved successfully",
810+
"bytes", len(jsonData),
811+
"rateEntries", len(currentState.Rate),
812+
"botEntries", len(currentState.Bots),
813+
"verifiedEntries", len(currentState.Verified),
814+
"lockMs", lockDuration.Milliseconds(),
815+
"readMs", readDuration.Milliseconds(),
816+
"reconcileMs", reconcileDuration.Milliseconds(),
817+
"marshalMs", marshalDuration.Milliseconds(),
818+
"writeMs", writeDuration.Milliseconds(),
819+
"totalMs", totalDuration.Milliseconds(),
820+
)
774821
}
775822

776823
func (bc *CaptchaProtect) loadState() {

0 commit comments

Comments
 (0)