diff --git a/acceptance-tests/bosh_helpers.go b/acceptance-tests/bosh_helpers.go index 8197b47c..415ee0e4 100644 --- a/acceptance-tests/bosh_helpers.go +++ b/acceptance-tests/bosh_helpers.go @@ -322,3 +322,13 @@ func crashHAProxy(haproxyInfo haproxyInfo) { _, _, err := runOnRemote(haproxyInfo.SSHUser, haproxyInfo.PublicIP, haproxyInfo.SSHPrivateKey, "sudo pkill -9 -x haproxy") Expect(err).NotTo(HaveOccurred()) } + +// runHAProxySocketCommand sends a command to the HAProxy Runtime API via the stats socket using socat. +// Returns the trimmed stdout output. +func runHAProxySocketCommand(haproxyInfo haproxyInfo, command string) string { + cmd := fmt.Sprintf(`echo "%s" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock`, command) + stdout, _, err := runOnRemote(haproxyInfo.SSHUser, haproxyInfo.PublicIP, haproxyInfo.SSHPrivateKey, cmd) + Expect(err).NotTo(HaveOccurred()) + return strings.TrimSpace(stdout) +} + diff --git a/acceptance-tests/rate_limit_test.go b/acceptance-tests/rate_limit_test.go index 3415d967..2400d173 100644 --- a/acceptance-tests/rate_limit_test.go +++ b/acceptance-tests/rate_limit_test.go @@ -20,16 +20,16 @@ var _ = Describe("Rate-Limiting", func() { value: 10s - type: replace path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/requests_rate_limit/table_size? - value: 100 + value: 1k - type: replace path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit?/connections value: %d - type: replace path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/window_size? - value: 100s + value: 10s - type: replace path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/table_size? - value: 100 + value: 1k `, rateLimit, rateLimit) haproxyBackendPort := 12000 haproxyInfo, _ := deployHAProxy(baseManifestVars{ @@ -118,10 +118,10 @@ var _ = Describe("Rate-Limiting", func() { value: %d - type: replace path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/window_size? - value: 100s + value: 10s - type: replace path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/table_size? - value: 100 + value: 1k - type: replace path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/block? value: true @@ -285,4 +285,99 @@ var _ = Describe("Rate-Limiting", func() { } } }) + + It("Connection Based Limiting works via manifest and can be overridden at runtime via socket", func() { + connLimit := 5 + opsfileConnectionsRateLimit := fmt.Sprintf(`--- +- type: replace + path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit?/connections + value: %d +- type: replace + path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/window_size? + value: 10s +- type: replace + path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/table_size? + value: 100 +- type: replace + path: /instance_groups/name=haproxy/jobs/name=haproxy/properties/ha_proxy/connections_rate_limit/block? + value: true +`, connLimit) + haproxyBackendPort := 12000 + haproxyInfo, _ := deployHAProxy(baseManifestVars{ + haproxyBackendPort: haproxyBackendPort, + haproxyBackendServers: []string{"127.0.0.1"}, + deploymentName: deploymentNameForTestNode(), + }, []string{opsfileConnectionsRateLimit}, map[string]interface{}{}, true) + + closeLocalServer, localPort := startDefaultTestServer() + defer closeLocalServer() + + closeTunnel := setupTunnelFromHaproxyToTestServer(haproxyInfo, haproxyBackendPort, localPort) + defer closeTunnel() + + By("Verifying proc.connections_rate_limit_connections is initialised from manifest value") + output := runHAProxySocketCommand(haproxyInfo, "get var proc.connections_rate_limit_connections") + Expect(output).To(ContainSubstring(fmt.Sprintf("%d", connLimit))) + + By("Verifying proc.connections_rate_limit_block is initialised as true from manifest block: true") + output = runHAProxySocketCommand(haproxyInfo, "get var proc.connections_rate_limit_block") + Expect(output).To(ContainSubstring("1")) + + By("Verifying connections are blocked after exceeding the manifest-configured limit") + testRequestCount := int(float64(connLimit) * 1.5) + firstFailure := -1 + successfulRequestCount := 0 + for i := 0; i < testRequestCount; i++ { + rt := &http.Transport{DisableKeepAlives: true} + client := &http.Client{Transport: rt} + resp, err := client.Get(fmt.Sprintf("http://%s/foo", haproxyInfo.PublicIP)) + if err == nil && resp.StatusCode == 200 { + resp.Body.Close() + successfulRequestCount++ + continue + } + if err == nil { + resp.Body.Close() + } + if firstFailure == -1 { + firstFailure = i + } + } + Expect(firstFailure).To(Equal(connLimit)) + Expect(successfulRequestCount).To(Equal(connLimit)) + + By("Clearing stick table before overriding limit") + runHAProxySocketCommand(haproxyInfo, "clear table st_tcp_conn_rate") + + By("Overriding the limit at runtime via socket to a higher value") + newLimit := connLimit * 3 + runHAProxySocketCommand(haproxyInfo, fmt.Sprintf("experimental-mode on; set var proc.connections_rate_limit_connections int(%d)", newLimit)) + + By("Verifying the override is reflected via get var") + output = runHAProxySocketCommand(haproxyInfo, "get var proc.connections_rate_limit_connections") + Expect(output).To(ContainSubstring(fmt.Sprintf("%d", newLimit))) + + By("Verifying connections are allowed up to the new higher socket-configured limit") + testRequestCount = int(float64(newLimit) * 1.5) + firstFailure = -1 + successfulRequestCount = 0 + for i := 0; i < testRequestCount; i++ { + rt := &http.Transport{DisableKeepAlives: true} + client := &http.Client{Transport: rt} + resp, err := client.Get(fmt.Sprintf("http://%s/foo", haproxyInfo.PublicIP)) + if err == nil && resp.StatusCode == 200 { + resp.Body.Close() + successfulRequestCount++ + continue + } + if err == nil { + resp.Body.Close() + } + if firstFailure == -1 { + firstFailure = i + } + } + Expect(firstFailure).To(Equal(newLimit)) + Expect(successfulRequestCount).To(Equal(newLimit)) + }) }) diff --git a/docs/rate_limiting.md b/docs/rate_limiting.md index b6e63ea6..2dd4265d 100644 --- a/docs/rate_limiting.md +++ b/docs/rate_limiting.md @@ -110,12 +110,74 @@ frontend http-in ``` ## Querying Current Stick-Table Status -To get more insight into what is going on inside HAProxy regarding its rate limits, you can query the stats socket to get the raw table data: +To get more insight into what is going on inside HAProxy regarding its rate limits, you can query the stats socket at `/var/vcap/sys/run/haproxy/stats.sock` to get the raw table data: ```bash $ echo "show table st_http_req_rate" | socat /var/vcap/sys/run/haproxy/stats.sock - # table: st_http_req_rate, type: ip, size:10485760, used:1 -0x56495f3dc3d0: key=172.18.0.1 use=0 exp=7618 http_req_rate(10000)=10 +0x...: key=:ffff:172.18.0.1 use=0 exp=7618 http_req_rate(10000)=10 + +echo "show table st_tcp_conn_rate" | socat stdio /var/vcap/sys/run/haproxy/stats.sock +# => # table: st_tcp_conn_rate, type: ipv6, size:1048576, used:2 +# => 0x...: key=::ffff:203.0.113.42 use=0 exp=8123 shard=0 conn_rate(10000)=5 +``` + +To find the IP with the highest connection rate, use: + +```bash +echo "show table st_tcp_conn_rate" | socat stdio /var/vcap/sys/run/haproxy/stats.sock | sort -t= -k2 -rn | head -1 ``` > Note: You will likely need `sudo` permission to run socat. + +## Control Connection Rate Limiting via HAProxy Runtime API + +Normally, changing rate-limit settings requires updating the manifest and reloading HAProxy. Using the HAProxy Runtime API, blocking can be enabled or disabled, and the connection threshold can be tightened or loosened while HAProxy continues running and serving traffic. This is particularly useful during an active incident, when a rapid reaction is needed. + +### Prerequisites + +- `ha_proxy.master_cli_enable: true` or `ha_proxy.stats_enable: true` must be set in the manifest to enable the HAProxy Runtime API. +- `ha_proxy.connections_rate_limit.table_size` and `ha_proxy.connections_rate_limit.window_size` must be defined in the manifest to create the stick table and enable connection tracking. +- `root` permissions are required to write to the socket. + +### How Runtime Control Works + +When HAProxy starts, it reads `connections_rate_limit.block` and `connections_rate_limit.connections` from the manifest and stores them as process-level variables inside the running HAProxy process. Updating a variable instantly changes the behavior for all subsequent connections, as every new TCP connection is evaluated against these variables in real time. + +These variables are updated by sending plain-text commands to the HAProxy stats socket. The socket is available as long as HAProxy is running, and any change persists until the next redeploy, at which point the manifest values are restored. + +> Note: the connections threshold is applied per the defined window_size, which is also used for counting connections. For example, if `window_size` is set to `10s` and `connections` is set to `100`, then the threshold of 100 connections applies to every 10-second window. + +### Inspect Current Variable Values + +```bash +echo "get var proc.connections_rate_limit_connections" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock +# => proc.connections_rate_limit_connections: type=sint value=<600> + +echo "get var proc.connections_rate_limit_block" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock +# => proc.connections_rate_limit_block: type=bool value=<1> +``` + +### Enable or Disable Blocking at Runtime + +```bash +# Enable blocking (equivalent to setting block: true in the manifest) +echo "experimental-mode on; set var proc.connections_rate_limit_block bool(true)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock + +# Disable blocking without reloading (equivalent to setting block: false in the manifest) +echo "experimental-mode on; set var proc.connections_rate_limit_block bool(false)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock +``` + +### Adjust the Connections Threshold at Runtime + +```bash +# Allow up to 100 connections per window (equivalent to setting connections: 100 in the manifest) +echo "experimental-mode on; set var proc.connections_rate_limit_connections int(100)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock +``` + +### Enable Rate Limiting and Set Threshold in One Step + +```bash +echo "experimental-mode on; set var proc.connections_rate_limit_connections int(100); set var proc.connections_rate_limit_block bool(true)" | sudo socat stdio /var/vcap/sys/run/haproxy/stats.sock +``` + diff --git a/jobs/haproxy/templates/haproxy.config.erb b/jobs/haproxy/templates/haproxy.config.erb index 16ef574c..12ef57d7 100644 --- a/jobs/haproxy/templates/haproxy.config.erb +++ b/jobs/haproxy/templates/haproxy.config.erb @@ -230,6 +230,15 @@ end abort "Conflicting configuration: enable_redispatch works only with retries > 0" end + # Safety guard: block=true without connections would cause every client with >= 1 connection to be blocked (total lockout) + if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do + if p("ha_proxy.connections_rate_limit.block", false) + if !p("ha_proxy.connections_rate_limit.connections", nil) + abort "connections_rate_limit.connections must be set in the manifest as the initial threshold when block is true; otherwise rate-limiting will be silently disabled until a value is set via the runtime API." + end + end + end + backend_servers = [] backend_servers_local = [] backend_port = nil @@ -324,6 +333,12 @@ global <%- if backend_match_http_protocol && backends.length == 2 -%> set-var proc.h2_alpn_tag str(h2) <%- end -%> + <%- if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do -%> + <%- if_p("ha_proxy.connections_rate_limit.connections") do |conn_rate_connections| -%> + set-var proc.connections_rate_limit_connections int(<%= conn_rate_connections %>) + <%- end -%> + set-var proc.connections_rate_limit_block bool(<%= p("ha_proxy.connections_rate_limit.block", false) %>) + <%- end -%> <%- if p("ha_proxy.always_allow_body_http10") %> h1-accept-payload-with-any-method <%- end %> @@ -437,11 +452,8 @@ frontend http-in tcp-request <%= tcp_request_phase %> reject if layer4_block <%- if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do -%> tcp-request <%= tcp_request_phase %> track-sc0 src table st_tcp_conn_rate - <%- if_p("ha_proxy.connections_rate_limit.block", "ha_proxy.connections_rate_limit.connections") do |block, connections| -%> - <%-if block -%> - tcp-request <%= tcp_request_phase %> reject if { sc_conn_rate(0) gt <%= connections %> } - <%- end -%> - <%- end -%> + # use sub() converter as variable references are only accepted as arguments to converters + tcp-request <%= tcp_request_phase %> reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 } <%- end -%> <%- if_p("ha_proxy.requests_rate_limit.table_size", "ha_proxy.requests_rate_limit.window_size") do -%> http-request track-sc1 src table st_http_req_rate @@ -571,11 +583,8 @@ frontend https-in tcp-request <%= tcp_request_phase %> reject if layer4_block <%- if_p("ha_proxy.connections_rate_limit.table_size", "ha_proxy.connections_rate_limit.window_size") do -%> tcp-request <%= tcp_request_phase %> track-sc0 src table st_tcp_conn_rate - <%- if_p("ha_proxy.connections_rate_limit.block", "ha_proxy.connections_rate_limit.connections") do |block, connections| -%> - <%-if block -%> - tcp-request <%= tcp_request_phase %> reject if { sc_conn_rate(0) gt <%= connections %> } - <%- end -%> - <%- end -%> + # use sub() converter as variable references are only accepted as arguments to converters + tcp-request <%= tcp_request_phase %> reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 } <%- end -%> <%- if_p("ha_proxy.requests_rate_limit.table_size", "ha_proxy.requests_rate_limit.window_size") do -%> http-request track-sc1 src table st_http_req_rate diff --git a/spec/haproxy/templates/haproxy_config/rate_limit_spec.rb b/spec/haproxy/templates/haproxy_config/rate_limit_spec.rb index a3bce227..f041cf90 100644 --- a/spec/haproxy/templates/haproxy_config/rate_limit_spec.rb +++ b/spec/haproxy/templates/haproxy_config/rate_limit_spec.rb @@ -61,6 +61,21 @@ end end + context 'when ha_proxy.connections_rate_limit "window_size" and "table_size" are NOT provided' do + context 'when "connections" and "block" are set in manifest' do + let(:properties) do + default_properties.deep_merge({ + 'connections_rate_limit' => { 'connections' => '5', 'block' => true } + }) + end + + it 'does not set proc.connections_rate_limit_connections or proc.connections_rate_limit_block in global section' do + expect(haproxy_conf['global']).not_to include('set-var proc.connections_rate_limit_connections') + expect(haproxy_conf['global']).not_to include('set-var proc.connections_rate_limit_block') + end + end + end + context 'when ha_proxy.connections_rate_limit properties "window_size", "table_size" are provided' do let(:backend_conn_rate) { haproxy_conf['backend st_tcp_conn_rate'] } @@ -88,6 +103,16 @@ expect(frontend_https).to include('tcp-request connection track-sc0 src table st_tcp_conn_rate') end + it 'always emits the reject rule (even without connections or block set in manifest)' do + expect(frontend_http).to include('tcp-request connection reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') + expect(frontend_https).to include('tcp-request connection reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') + end + + it 'always sets proc.connections_rate_limit_block to false in global when block is not configured in manifest' do + expect(haproxy_conf['global']).to include('set-var proc.connections_rate_limit_block bool(false)') + expect(haproxy_conf['global']).not_to include('set-var proc.connections_rate_limit_connections') + end + context 'when proxy protocol used' do let(:properties) do temp_properties.deep_merge({ 'accept_proxy' => true }) @@ -104,23 +129,49 @@ temp_properties.deep_merge({ 'connections_rate_limit' => { 'connections' => '5', 'block' => 'true' } }) end - it 'adds tcp-request connection reject to http-in and https-in frontends' do - expect(frontend_http).to include('tcp-request connection reject if { sc_conn_rate(0) gt 5 }') + it 'adds tcp-request connection reject using process variables to http-in and https-in frontends' do + expect(frontend_http).to include('tcp-request connection reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') expect(frontend_http).to include('tcp-request connection track-sc0 src table st_tcp_conn_rate') - expect(frontend_https).to include('tcp-request connection reject if { sc_conn_rate(0) gt 5 }') + expect(frontend_https).to include('tcp-request connection reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') expect(frontend_https).to include('tcp-request connection track-sc0 src table st_tcp_conn_rate') end end + context 'when "connections" is provided but "block" is false' do + let(:properties) do + temp_properties.deep_merge({ 'connections_rate_limit' => { 'connections' => '10', 'block' => false } }) + end + + it 'sets proc.connections_rate_limit_connections and proc.connections_rate_limit_block process variables in global section' do + expect(haproxy_conf['global']).to include('set-var proc.connections_rate_limit_connections int(10)') + expect(haproxy_conf['global']).to include('set-var proc.connections_rate_limit_block bool(false)') + end + + it 'still emits reject rule (rejection controlled at runtime via proc.connections_rate_limit_block variable)' do + expect(frontend_http).to include('tcp-request connection reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') + expect(frontend_https).to include('tcp-request connection reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') + end + end + + context 'when only "block" is true but "connections" is not set in manifest' do + let(:properties) do + temp_properties.deep_merge({ 'connections_rate_limit' => { 'block' => true } }) + end + + it 'raises a validation error to prevent total lockout (every client with >= 1 connection would be blocked)' do + expect { haproxy_conf }.to raise_error(/connections_rate_limit.connections must be set in the manifest as the initial threshold when block is true/) + end + end + context 'when proxy protocol used and "connections" and "block" are also provided' do let(:properties) do - temp_properties.deep_merge({ 'accept_proxy' => true, 'connections_rate_limit' => { 'connections' => '5', 'block' => 'true' } }) + temp_properties.deep_merge({ 'accept_proxy' => true, 'connections_rate_limit' => { 'connections' => '5', 'block' => true } }) end - it 'adds tcp-request session reject to http-in and https-in frontends' do - expect(frontend_http).to include('tcp-request session reject if { sc_conn_rate(0) gt 5 }') + it 'adds tcp-request session reject using process variables to http-in and https-in frontends' do + expect(frontend_http).to include('tcp-request session reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') expect(frontend_http).to include('tcp-request session track-sc0 src table st_tcp_conn_rate') - expect(frontend_https).to include('tcp-request session reject if { sc_conn_rate(0) gt 5 }') + expect(frontend_https).to include('tcp-request session reject if { var(proc.connections_rate_limit_block) -m bool } { var(proc.connections_rate_limit_connections) -m int gt 0 } { sc_conn_rate(0),sub(proc.connections_rate_limit_connections) gt 0 }') expect(frontend_https).to include('tcp-request session track-sc0 src table st_tcp_conn_rate') end end