Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions acceptance-tests/bosh_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,13 @@ func crashHAProxy(haproxyInfo haproxyInfo) {
_, _, err := runOnRemote(haproxyInfo.SSHUser, haproxyInfo.PublicIP, haproxyInfo.SSHPrivateKey, "sudo pkill -9 -x haproxy")
Expect(err).NotTo(HaveOccurred())
}

// runHAProxySocketCommand sends a command to the HAProxy Runtime API via the stats socket using socat.
// Returns the trimmed stdout output.
func runHAProxySocketCommand(haproxyInfo haproxyInfo, command string) string {
cmd := fmt.Sprintf(`echo "%s" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock`, command)
stdout, _, err := runOnRemote(haproxyInfo.SSHUser, haproxyInfo.PublicIP, haproxyInfo.SSHPrivateKey, cmd)
Expect(err).NotTo(HaveOccurred())
return strings.TrimSpace(stdout)
}

105 changes: 100 additions & 5 deletions acceptance-tests/rate_limit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ var _ = Describe("Rate-Limiting", func() {
value: 10s
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/requests_rate_limit/table_size?
value: 100
value: 1k
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit?/connections
value: %d
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/window_size?
value: 100s
value: 10s
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/table_size?
value: 100
value: 1k
`, rateLimit, rateLimit)
haproxyBackendPort := 12000
haproxyInfo, _ := deployHAProxy(baseManifestVars{
Expand Down Expand Up @@ -118,10 +118,10 @@ var _ = Describe("Rate-Limiting", func() {
value: %d
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/window_size?
value: 100s
value: 10s
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/table_size?
value: 100
value: 1k
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/block?
value: true
Expand Down Expand Up @@ -285,4 +285,99 @@ var _ = Describe("Rate-Limiting", func() {
}
}
})

It("Connection Based Limiting works via manifest and can be overridden at runtime via socket", func() {
connLimit := 5
opsfileConnectionsRateLimit := fmt.Sprintf(`---
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit?/connections
value: %d
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/window_size?
value: 10s
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/table_size?
value: 100
- type: replace
path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/block?
value: true
`, connLimit)
haproxyBackendPort := 12000
haproxyInfo, _ := deployHAProxy(baseManifestVars{
haproxyBackendPort: haproxyBackendPort,
haproxyBackendServers: []string{"127.0.0.1"},
deploymentName: deploymentNameForTestNode(),
}, []string{opsfileConnectionsRateLimit}, map[string]interface{}{}, true)

closeLocalServer, localPort := startDefaultTestServer()
defer closeLocalServer()

closeTunnel := setupTunnelFromHaproxyToTestServer(haproxyInfo, haproxyBackendPort, localPort)
defer closeTunnel()

By("Verifying proc.connections_rate_limit_connections is initialised from manifest value")
output := runHAProxySocketCommand(haproxyInfo, "get var proc.connections_rate_limit_connections")
Expect(output).To(ContainSubstring(fmt.Sprintf("%d", connLimit)))

By("Verifying proc.connections_rate_limit_block is initialised as true from manifest block: true")
output = runHAProxySocketCommand(haproxyInfo, "get var proc.connections_rate_limit_block")
Expect(output).To(ContainSubstring("1"))

By("Verifying connections are blocked after exceeding the manifest-configured limit")
testRequestCount := int(float64(connLimit) * 1.5)
firstFailure := -1
successfulRequestCount := 0
for i := 0; i < testRequestCount; i++ {
rt := &http.Transport{DisableKeepAlives: true}
client := &http.Client{Transport: rt}
resp, err := client.Get(fmt.Sprintf("http://%s/foo", haproxyInfo.PublicIP))
if err == nil && resp.StatusCode == 200 {
resp.Body.Close()
successfulRequestCount++
continue
}
if err == nil {
resp.Body.Close()
}
if firstFailure == -1 {
firstFailure = i
}
}
Expect(firstFailure).To(Equal(connLimit))
Expect(successfulRequestCount).To(Equal(connLimit))

By("Clearing stick table before overriding limit")
runHAProxySocketCommand(haproxyInfo, "clear table st_tcp_conn_rate")

By("Overriding the limit at runtime via socket to a higher value")
newLimit := connLimit * 3
runHAProxySocketCommand(haproxyInfo, fmt.Sprintf("experimental-mode on; set var proc.connections_rate_limit_connections int(%d)", newLimit))

By("Verifying the override is reflected via get var")
output = runHAProxySocketCommand(haproxyInfo, "get var proc.connections_rate_limit_connections")
Expect(output).To(ContainSubstring(fmt.Sprintf("%d", newLimit)))

By("Verifying connections are allowed up to the new higher socket-configured limit")
testRequestCount = int(float64(newLimit) * 1.5)
firstFailure = -1
successfulRequestCount = 0
for i := 0; i < testRequestCount; i++ {
rt := &http.Transport{DisableKeepAlives: true}
client := &http.Client{Transport: rt}
resp, err := client.Get(fmt.Sprintf("http://%s/foo", haproxyInfo.PublicIP))
if err == nil && resp.StatusCode == 200 {
resp.Body.Close()
successfulRequestCount++
continue
}
if err == nil {
resp.Body.Close()
}
if firstFailure == -1 {
firstFailure = i
}
}
Expect(firstFailure).To(Equal(newLimit))
Expect(successfulRequestCount).To(Equal(newLimit))
})
})
66 changes: 64 additions & 2 deletions docs/rate_limiting.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,74 @@ frontend http-in
```

## Querying Current Stick-Table Status
To get more insight into what is going on inside HAProxy regarding its rate limits, you can query the stats socket to get the raw table data:
To get more insight into what is going on inside HAProxy regarding its rate limits, you can query the stats socket at `/var/vcap/sys/run/haproxy/stats.sock` to get the raw table data:

```bash
$ echo "show table st_http_req_rate" | socat /var/vcap/sys/run/haproxy/stats.sock -
# table: st_http_req_rate, type: ip, size:10485760, used:1
0x56495f3dc3d0: key=172.18.0.1 use=0 exp=7618 http_req_rate(10000)=10
0x...: key=:ffff:172.18.0.1 use=0 exp=7618 http_req_rate(10000)=10

echo "show table st_tcp_conn_rate" | socat stdio /var/vcap/sys/run/haproxy/stats.sock
# => # table: st_tcp_conn_rate, type: ipv6, size:1048576, used:2
# => 0x...: key=::ffff:203.0.113.42 use=0 exp=8123 shard=0 conn_rate(10000)=5
```

To find the IP with the highest connection rate, use:

```bash
echo "show table st_tcp_conn_rate" | socat stdio /var/vcap/sys/run/haproxy/stats.sock | sort -t= -k2 -rn | head -1
```

> Note: You will likely need `sudo` permission to run socat.

## Control Connection Rate Limiting via HAProxy Runtime API

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe make the two new proc. variables more prominent, by introducing them in an unsorted list, similar to the variables and options listed in the section "Configuration Options". It was first a bit hard for me to understand why we have these proc variables now.

One point about the naming:
connections_rate_limit.block -> proc.conn_rate_block
connections_rate_limit.connections -> proc.conn_rate_limit

Is inconsistent. If we could still change the it and accept it's breaking, I'd prefer the following variable naming:

connection_rate_limit.enabled -> proc.connection_rate_limit_enabled
connection_rate_limit.connections -> proc.connection_rate_limit_connections

This would be more consistent, and having enabled instead of block indicates that this is a boolean switch.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed variables:
connection_rate_limit.block -> proc.connection_rate_limit_block
connection_rate_limit.connections -> proc.connection_rate_limit_connections

Enhanced the documentation


Normally, changing rate-limit settings requires updating the manifest and reloading HAProxy. Using the HAProxy Runtime API, blocking can be enabled or disabled, and the connection threshold can be tightened or loosened while HAProxy continues running and serving traffic. This is particularly useful during an active incident, when a rapid reaction is needed.

### Prerequisites

- `ha_proxy.master_cli_enable: true` or `ha_proxy.stats_enable: true` must be set in the manifest to enable the HAProxy Runtime API.
- `ha_proxy.connections_rate_limit.table_size` and `ha_proxy.connections_rate_limit.window_size` must be defined in the manifest to create the stick table and enable connection tracking.
- `root` permissions are required to write to the socket.

### How Runtime Control Works

When HAProxy starts, it reads `connections_rate_limit.block` and `connections_rate_limit.connections` from the manifest and stores them as process-level variables inside the running HAProxy process. Updating a variable instantly changes the behavior for all subsequent connections, as every new TCP connection is evaluated against these variables in real time.

These variables are updated by sending plain-text commands to the HAProxy stats socket. The socket is available as long as HAProxy is running, and any change persists until the next redeploy, at which point the manifest values are restored.

> Note: the connections threshold is applied per the defined window_size, which is also used for counting connections. For example, if `window_size` is set to `10s` and `connections` is set to `100`, then the threshold of 100 connections applies to every 10-second window.

### Inspect Current Variable Values

```bash
echo "get var proc.connections_rate_limit_connections" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock
# => proc.connections_rate_limit_connections: type=sint value=<600>

echo "get var proc.connections_rate_limit_block" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock
# => proc.connections_rate_limit_block: type=bool value=<1>
```

### Enable or Disable Blocking at Runtime

```bash
# Enable blocking (equivalent to setting block: true in the manifest)
echo "experimental-mode on; set var proc.connections_rate_limit_block bool(true)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock

# Disable blocking without reloading (equivalent to setting block: false in the manifest)
echo "experimental-mode on; set var proc.connections_rate_limit_block bool(false)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock
```

### Adjust the Connections Threshold at Runtime

```bash
# Allow up to 100 connections per window (equivalent to setting connections: 100 in the manifest)
echo "experimental-mode on; set var proc.connections_rate_limit_connections int(100)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock
```

### Enable Rate Limiting and Set Threshold in One Step

```bash
echo "experimental-mode on; set var proc.connections_rate_limit_connections int(100); set var proc.connections_rate_limit_block bool(true)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock
```

29 changes: 19 additions & 10 deletions jobs/haproxy/templates/haproxy.config.erb
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,15 @@ end
abort "Conflicting configuration: enable_redispatch works only with retries > 0"
end

# Safety guard: block=true without connections would cause every client with >= 1 connection to be blocked (total lockout)
if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do
if p("ha_proxy.connections_rate_limit.block", false)
if !p("ha_proxy.connections_rate_limit.connections", nil)
abort "connections_rate_limit.connections must be set in the manifest as the initial threshold when block is true; otherwise rate-limiting will be silently disabled until a value is set via the runtime API."
end
end
end

backend_servers = []
backend_servers_local = []
backend_port = nil
Expand Down Expand Up @@ -324,6 +333,12 @@ global
<%- if backend_match_http_protocol && backends.length == 2 -%>
set-var proc.h2_alpn_tag str(h2)
<%- end -%>
<%- if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do -%>
<%- if_p("ha_proxy.connections_rate_limit.connections") do |conn_rate_connections| -%>
set-var proc.connections_rate_limit_connections int(<%= conn_rate_connections %>)
<%- end -%>
Comment on lines +337 to +339

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did I understand correctly that the existence of connections_rate_limit.connections in manifest controls if haproxy config will have a process variable, and when it is not set in manifest, the rate-limit control is not possible on the fly? If so, it does not look straightforward to me.
Can we set the variable to zero if the parameter is not set in the manifest? Similar to what we do for proc.conn_rate_block.

Suggested change
<%- if_p("ha_proxy.connections_rate_limit.connections") do |conn_rate_connections| -%>
set-var proc.conn_rate_limit int(<%= conn_rate_connections %>)
<%- end -%>
set-var proc.conn_rate_limit int(<%= p("ha_proxy.connections_rate_limit.connections", 0) %>)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We store the value from the manifest in a process-level variable if defined. If not, it can be set via the socket later. We cannot set the default value to 0, as 0 is a very wrong threshold.

set-var proc.connections_rate_limit_block bool(<%= p("ha_proxy.connections_rate_limit.block", false) %>)
<%- end -%>
<%- if p("ha_proxy.always_allow_body_http10") %>
h1-accept-payload-with-any-method
<%- end %>
Expand Down Expand Up @@ -437,11 +452,8 @@ frontend http-in
tcp-request <%= tcp_request_phase %> reject if layer4_block
<%- if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do -%>
tcp-request <%= tcp_request_phase %> track-sc0 src table st_tcp_conn_rate
<%- if_p("ha_proxy.connections_rate_limit.block", "ha_proxy.connections_rate_limit.connections") do |block, connections| -%>
<%-if block -%>
tcp-request <%= tcp_request_phase %> reject if { sc_conn_rate(0) gt <%= connections %> }
<%- end -%>
<%- end -%>
# use sub() converter as variable references are only accepted as arguments to converters
tcp-request <%= tcp_request_phase %> reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }
<%- end -%>
<%- if_p("ha_proxy.requests_rate_limit.table_size", "ha_proxy.requests_rate_limit.window_size") do -%>
http-request track-sc1 src table st_http_req_rate
Expand Down Expand Up @@ -571,11 +583,8 @@ frontend https-in
tcp-request <%= tcp_request_phase %> reject if layer4_block
<%- if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do -%>
tcp-request <%= tcp_request_phase %> track-sc0 src table st_tcp_conn_rate
<%- if_p("ha_proxy.connections_rate_limit.block", "ha_proxy.connections_rate_limit.connections") do |block, connections| -%>
<%-if block -%>
tcp-request <%= tcp_request_phase %> reject if { sc_conn_rate(0) gt <%= connections %> }
<%- end -%>
<%- end -%>
# use sub() converter as variable references are only accepted as arguments to converters
tcp-request <%= tcp_request_phase %> reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }
<%- end -%>
<%- if_p("ha_proxy.requests_rate_limit.table_size", "ha_proxy.requests_rate_limit.window_size") do -%>
http-request track-sc1 src table st_http_req_rate
Expand Down
Loading