From 77a8ab414be7b3a0c2f454b5ef54cd3053327360 Mon Sep 17 00:00:00 2001 From: esrakartalOpt Date: Tue, 13 Jan 2026 16:06:38 -0600 Subject: [PATCH] [FSSDK-12150] [Ruby] Add Event Retries --- lib/optimizely/event_dispatcher.rb | 85 +++++++++---- lib/optimizely/helpers/constants.rb | 9 +- lib/optimizely/odp/odp_event_manager.rb | 18 ++- spec/event_dispatcher_spec.rb | 151 +++++++++++++++++++++--- spec/odp/odp_event_manager_spec.rb | 67 ++++++++++- 5 files changed, 285 insertions(+), 45 deletions(-) diff --git a/lib/optimizely/event_dispatcher.rb b/lib/optimizely/event_dispatcher.rb index 874a43db..1669f6ec 100644 --- a/lib/optimizely/event_dispatcher.rb +++ b/lib/optimizely/event_dispatcher.rb @@ -37,33 +37,74 @@ def initialize(logger: nil, error_handler: nil, proxy_config: nil) # # @param event - Event object def dispatch_event(event) - response = Helpers::HttpUtils.make_request( - event.url, event.http_verb, event.params.to_json, event.headers, Helpers::Constants::EVENT_DISPATCH_CONFIG[:REQUEST_TIMEOUT], @proxy_config - ) + retry_count = 0 + max_retries = Helpers::Constants::EVENT_DISPATCH_CONFIG[:MAX_RETRIES] - error_msg = "Event failed to dispatch with response code: #{response.code}" + while retry_count < max_retries + begin + response = Helpers::HttpUtils.make_request( + event.url, event.http_verb, event.params.to_json, event.headers, Helpers::Constants::EVENT_DISPATCH_CONFIG[:REQUEST_TIMEOUT], @proxy_config + ) - case response.code.to_i - when 400...500 - @logger.log(Logger::ERROR, error_msg) - @error_handler.handle_error(HTTPCallError.new("HTTP Client Error: #{response.code}")) + error_msg = "Event failed to dispatch with response code: #{response.code}" - when 500...600 - @logger.log(Logger::ERROR, error_msg) - @error_handler.handle_error(HTTPCallError.new("HTTP Server Error: #{response.code}")) - else - @logger.log(Logger::DEBUG, "event successfully sent with response code #{response.code}") + case response.code.to_i + when 400...500 + @logger.log(Logger::ERROR, error_msg) + @error_handler.handle_error(HTTPCallError.new("HTTP Client Error: #{response.code}")) + # Don't retry on 4xx client errors + return + + when 500...600 + @logger.log(Logger::ERROR, error_msg) + @error_handler.handle_error(HTTPCallError.new("HTTP Server Error: #{response.code}")) + # Retry on 5xx server errors + retry_count += 1 + if retry_count < max_retries + delay = calculate_retry_interval(retry_count - 1) + @logger.log(Logger::DEBUG, "Retrying event dispatch (attempt #{retry_count} of #{max_retries - 1}) after #{delay}s") + sleep(delay) + end + else + @logger.log(Logger::DEBUG, "event successfully sent with response code #{response.code}") + return + end + rescue Timeout::Error => e + @logger.log(Logger::ERROR, "Request Timed out. Error: #{e}") + @error_handler.handle_error(e) + + retry_count += 1 + # Returning Timeout error to retain existing behavior. + return e unless retry_count < max_retries + + delay = calculate_retry_interval(retry_count - 1) + @logger.log(Logger::DEBUG, "Retrying event dispatch (attempt #{retry_count} of #{max_retries - 1}) after #{delay}s") + sleep(delay) + rescue StandardError => e + @logger.log(Logger::ERROR, "Event failed to dispatch. Error: #{e}") + @error_handler.handle_error(e) + + retry_count += 1 + return nil unless retry_count < max_retries + + delay = calculate_retry_interval(retry_count - 1) + @logger.log(Logger::DEBUG, "Retrying event dispatch (attempt #{retry_count} of #{max_retries - 1}) after #{delay}s") + sleep(delay) + end end - rescue Timeout::Error => e - @logger.log(Logger::ERROR, "Request Timed out. Error: #{e}") - @error_handler.handle_error(e) + end - # Returning Timeout error to retain existing behavior. - e - rescue StandardError => e - @logger.log(Logger::ERROR, "Event failed to dispatch. Error: #{e}") - @error_handler.handle_error(e) - nil + private + + # Calculate exponential backoff interval: 200ms, 400ms, 800ms, ... capped at 1s + # + # @param retry_count - Zero-based retry count + # @return [Float] - Delay in seconds + def calculate_retry_interval(retry_count) + initial_interval = Helpers::Constants::EVENT_DISPATCH_CONFIG[:INITIAL_RETRY_INTERVAL] + max_interval = Helpers::Constants::EVENT_DISPATCH_CONFIG[:MAX_RETRY_INTERVAL] + interval = initial_interval * (2**retry_count) + [interval, max_interval].min end end end diff --git a/lib/optimizely/helpers/constants.rb b/lib/optimizely/helpers/constants.rb index db042acd..4334f56d 100644 --- a/lib/optimizely/helpers/constants.rb +++ b/lib/optimizely/helpers/constants.rb @@ -459,7 +459,10 @@ module Constants }.freeze EVENT_DISPATCH_CONFIG = { - REQUEST_TIMEOUT: 10 + REQUEST_TIMEOUT: 10, + MAX_RETRIES: 3, + INITIAL_RETRY_INTERVAL: 0.2, # 200ms in seconds + MAX_RETRY_INTERVAL: 1.0 # 1 second }.freeze ODP_GRAPHQL_API_CONFIG = { @@ -490,7 +493,9 @@ module Constants DEFAULT_QUEUE_CAPACITY: 10_000, DEFAULT_BATCH_SIZE: 10, DEFAULT_FLUSH_INTERVAL_SECONDS: 1, - DEFAULT_RETRY_COUNT: 3 + DEFAULT_RETRY_COUNT: 3, + INITIAL_RETRY_INTERVAL: 0.2, # 200ms in seconds + MAX_RETRY_INTERVAL: 1.0 # 1 second }.freeze HTTP_HEADERS = { diff --git a/lib/optimizely/odp/odp_event_manager.rb b/lib/optimizely/odp/odp_event_manager.rb index fc9084a1..e44ebd0d 100644 --- a/lib/optimizely/odp/odp_event_manager.rb +++ b/lib/optimizely/odp/odp_event_manager.rb @@ -239,7 +239,12 @@ def flush_batch! end break unless should_retry - @logger.log(Logger::DEBUG, 'Error dispatching ODP events, scheduled to retry.') if i < @retry_count + if i < @retry_count - 1 + # Exponential backoff: 200ms, 400ms, 800ms, ... capped at 1s + delay = calculate_retry_interval(i) + @logger.log(Logger::DEBUG, "Error dispatching ODP events, retrying (attempt #{i + 1} of #{@retry_count - 1}) after #{delay}s") + sleep(delay) + end i += 1 end @@ -282,5 +287,16 @@ def process_config_update @api_key = @odp_config&.api_key @api_host = @odp_config&.api_host end + + # Calculate exponential backoff interval: 200ms, 400ms, 800ms, ... capped at 1s + # + # @param retry_count - Zero-based retry count + # @return [Float] - Delay in seconds + def calculate_retry_interval(retry_count) + initial_interval = Helpers::Constants::ODP_EVENT_MANAGER[:INITIAL_RETRY_INTERVAL] + max_interval = Helpers::Constants::ODP_EVENT_MANAGER[:MAX_RETRY_INTERVAL] + interval = initial_interval * (2**retry_count) + [interval, max_interval].min + end end end diff --git a/spec/event_dispatcher_spec.rb b/spec/event_dispatcher_spec.rb index 193f584d..6d4fa2b0 100644 --- a/spec/event_dispatcher_spec.rb +++ b/spec/event_dispatcher_spec.rb @@ -47,16 +47,27 @@ it 'should pass the proxy_config to the HttpUtils helper class' do event = Optimizely::Event.new(:post, @url, @params, @post_headers) - expect(Optimizely::Helpers::HttpUtils).to receive(:make_request).with( + # Allow the method to be called (potentially multiple times due to retries) + allow(Optimizely::Helpers::HttpUtils).to receive(:make_request).with( event.url, event.http_verb, event.params.to_json, event.headers, Optimizely::Helpers::Constants::EVENT_DISPATCH_CONFIG[:REQUEST_TIMEOUT], proxy_config - ) + ).and_return(double(code: '200')) @customized_event_dispatcher.dispatch_event(event) + + # Verify it was called at least once with the correct parameters + expect(Optimizely::Helpers::HttpUtils).to have_received(:make_request).with( + event.url, + event.http_verb, + event.params.to_json, + event.headers, + Optimizely::Helpers::Constants::EVENT_DISPATCH_CONFIG[:REQUEST_TIMEOUT], + proxy_config + ).at_least(:once) end end @@ -117,11 +128,12 @@ result = @customized_event_dispatcher.dispatch_event(event) expect(result).to eq(timeout_error) + # With retries, this will be logged 3 times (once per attempt) expect(spy_logger).to have_received(:log).with( Logger::ERROR, 'Request Timed out. Error: Timeout::Error' - ).once + ).exactly(3).times - expect(error_handler).to have_received(:handle_error).once.with(Timeout::Error) + expect(error_handler).to have_received(:handle_error).exactly(3).times.with(Timeout::Error) end it 'should log and handle any standard error' do @@ -132,11 +144,12 @@ result = @customized_event_dispatcher.dispatch_event(event) expect(result).to eq(nil) + # With retries, this will be logged 3 times (once per attempt) expect(spy_logger).to have_received(:log).with( Logger::ERROR, 'Event failed to dispatch. Error: ArgumentError' - ).once + ).exactly(3).times - expect(error_handler).to have_received(:handle_error).once.with(ArgumentError) + expect(error_handler).to have_received(:handle_error).exactly(3).times.with(ArgumentError) end it 'should log and handle any response with status code 4xx' do @@ -159,22 +172,22 @@ @customized_event_dispatcher.dispatch_event(event) + # With retries, this will be logged 3 times (once per attempt) expect(spy_logger).to have_received(:log).with( Logger::ERROR, 'Event failed to dispatch with response code: 500' - ).once + ).exactly(3).times error = Optimizely::HTTPCallError.new('HTTP Server Error: 500') - expect(error_handler).to have_received(:handle_error).once.with(error) + expect(error_handler).to have_received(:handle_error).exactly(3).times.with(error) end it 'should do nothing on response with status code 3xx' do stub_request(:post, @url).to_return(status: 399) event = Optimizely::Event.new(:post, @url, @params, @post_headers) - response = @customized_event_dispatcher.dispatch_event(event) + @customized_event_dispatcher.dispatch_event(event) - expect(response).to have_received(:log) - expect(spy_logger).to have_received(:log) + expect(spy_logger).to have_received(:log).with(Logger::DEBUG, 'event successfully sent with response code 399') expect(error_handler).to_not have_received(:handle_error) end @@ -182,10 +195,120 @@ stub_request(:post, @url).to_return(status: 600) event = Optimizely::Event.new(:post, @url, @params, @post_headers) - response = @customized_event_dispatcher.dispatch_event(event) + @customized_event_dispatcher.dispatch_event(event) - expect(response).to have_received(:log) - expect(spy_logger).to have_received(:log) + expect(spy_logger).to have_received(:log).with(Logger::DEBUG, 'event successfully sent with response code 600') expect(error_handler).not_to have_received(:handle_error) end + + context 'retry logic with exponential backoff' do + it 'should retry on 5xx errors with exponential backoff' do + stub_request(:post, @url).to_return(status: 500).times(2).then.to_return(status: 200) + event = Optimizely::Event.new(:post, @url, @params, @post_headers) + + start_time = Time.now + @customized_event_dispatcher.dispatch_event(event) + elapsed_time = Time.now - start_time + + # Should make 3 requests total (1 initial + 2 retries) + expect(a_request(:post, @url)).to have_been_made.times(3) + + # Should have delays: 200ms + 400ms = 600ms minimum + expect(elapsed_time).to be >= 0.5 # Allow some tolerance + + # Should log retry attempts + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Retrying event dispatch/ + ).at_least(:twice) + end + + it 'should not retry on 4xx client errors' do + stub_request(:post, @url).to_return(status: 400) + event = Optimizely::Event.new(:post, @url, @params, @post_headers) + + @customized_event_dispatcher.dispatch_event(event) + + # Should only make 1 request (no retries) + expect(a_request(:post, @url)).to have_been_made.once + + # Should not log retry attempts + expect(spy_logger).not_to have_received(:log).with( + Logger::DEBUG, /Retrying event dispatch/ + ) + end + + it 'should retry on Timeout errors with exponential backoff' do + stub_request(:post, @url).to_timeout.times(2).then.to_return(status: 200) + event = Optimizely::Event.new(:post, @url, @params, @post_headers) + + start_time = Time.now + @customized_event_dispatcher.dispatch_event(event) + elapsed_time = Time.now - start_time + + # Should make 3 requests total (1 initial + 2 retries) + expect(a_request(:post, @url)).to have_been_made.times(3) + + # Should have delays: 200ms + 400ms = 600ms minimum + expect(elapsed_time).to be >= 0.5 + + # Should log retry attempts + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Retrying event dispatch/ + ).at_least(:twice) + end + + it 'should retry on standard errors with exponential backoff' do + stub_request(:post, @url).to_raise(StandardError.new('Network error')).times(2).then.to_return(status: 200) + event = Optimizely::Event.new(:post, @url, @params, @post_headers) + + start_time = Time.now + @customized_event_dispatcher.dispatch_event(event) + elapsed_time = Time.now - start_time + + # Should make 3 requests total (1 initial + 2 retries) + expect(a_request(:post, @url)).to have_been_made.times(3) + + # Should have delays: 200ms + 400ms = 600ms minimum + expect(elapsed_time).to be >= 0.5 + + # Should log retry attempts + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Retrying event dispatch/ + ).at_least(:twice) + end + + it 'should give up after max retries' do + stub_request(:post, @url).to_return(status: 500) + event = Optimizely::Event.new(:post, @url, @params, @post_headers) + + @customized_event_dispatcher.dispatch_event(event) + + # Should make max_retries requests (3) + expect(a_request(:post, @url)).to have_been_made.times(3) + + # Should log error for each retry + expect(spy_logger).to have_received(:log).with( + Logger::ERROR, 'Event failed to dispatch with response code: 500' + ).exactly(3).times + end + + it 'should calculate correct exponential backoff intervals' do + dispatcher = Optimizely::EventDispatcher.new + + # First retry: 200ms + expect(dispatcher.send(:calculate_retry_interval, 0)).to eq(0.2) + + # Second retry: 400ms + expect(dispatcher.send(:calculate_retry_interval, 1)).to eq(0.4) + + # Third retry: 800ms + expect(dispatcher.send(:calculate_retry_interval, 2)).to eq(0.8) + + # Fourth retry: capped at 1s + expect(dispatcher.send(:calculate_retry_interval, 3)).to eq(1.0) + + # Fifth retry: still capped at 1s + expect(dispatcher.send(:calculate_retry_interval, 4)).to eq(1.0) + end + end end diff --git a/spec/odp/odp_event_manager_spec.rb b/spec/odp/odp_event_manager_spec.rb index 57402887..6729bea5 100644 --- a/spec/odp/odp_event_manager_spec.rb +++ b/spec/odp/odp_event_manager_spec.rb @@ -260,16 +260,20 @@ allow(SecureRandom).to receive(:uuid).and_return(test_uuid) event_manager = Optimizely::OdpEventManager.new(logger: spy_logger) retry_count = event_manager.instance_variable_get('@retry_count') - allow(event_manager.api_manager).to receive(:send_odp_events).exactly(retry_count + 1).times.with(api_key, api_host, odp_events).and_return(true) + allow(event_manager.api_manager).to receive(:send_odp_events).exactly(retry_count).times.with(api_key, api_host, odp_events).and_return(true) event_manager.start!(odp_config) event_manager.send_event(**events[0]) event_manager.send_event(**events[1]) event_manager.flush - sleep(0.1) until event_manager.instance_variable_get('@event_queue').empty? + # Need to wait longer for retries with exponential backoff (200ms + 400ms = 600ms) + sleep(1) until event_manager.instance_variable_get('@event_queue').empty? expect(event_manager.instance_variable_get('@current_batch').length).to eq 0 - expect(spy_logger).to have_received(:log).exactly(retry_count).times.with(Logger::DEBUG, 'Error dispatching ODP events, scheduled to retry.') + # Updated log message includes retry attempt and delay info + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Error dispatching ODP events, retrying/ + ).exactly(retry_count - 1).times expect(spy_logger).to have_received(:log).once.with(Logger::ERROR, "ODP event send failed (Failed after 3 retries: #{processed_events.to_json}).") event_manager.stop! @@ -278,16 +282,20 @@ it 'should retry on network failure' do allow(SecureRandom).to receive(:uuid).and_return(test_uuid) event_manager = Optimizely::OdpEventManager.new(logger: spy_logger) - allow(event_manager.api_manager).to receive(:send_odp_events).once.with(api_key, api_host, odp_events).and_return(true, true, false) + allow(event_manager.api_manager).to receive(:send_odp_events).with(api_key, api_host, odp_events).and_return(true, true, false) event_manager.start!(odp_config) event_manager.send_event(**events[0]) event_manager.send_event(**events[1]) event_manager.flush - sleep(0.1) until event_manager.instance_variable_get('@event_queue').empty? + # Need to wait longer for retries with exponential backoff (200ms + 400ms = 600ms) + sleep(1) until event_manager.instance_variable_get('@event_queue').empty? expect(event_manager.instance_variable_get('@current_batch').length).to eq 0 - expect(spy_logger).to have_received(:log).twice.with(Logger::DEBUG, 'Error dispatching ODP events, scheduled to retry.') + # Updated log message includes retry attempt and delay info + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /Error dispatching ODP events, retrying/ + ).twice expect(spy_logger).not_to have_received(:log).with(Logger::ERROR, anything) expect(event_manager.running?).to be true event_manager.stop! @@ -539,5 +547,52 @@ expect(spy_logger).to have_received(:log).once.with(Logger::DEBUG, 'ODP event queue: cannot send before config has been set.') expect(spy_logger).not_to have_received(:log).with(Logger::ERROR, anything) end + + it 'should use exponential backoff between retries' do + allow(SecureRandom).to receive(:uuid).and_return(test_uuid) + event_manager = Optimizely::OdpEventManager.new(logger: spy_logger) + + # All requests fail to trigger retries + allow(event_manager.api_manager).to receive(:send_odp_events).with(api_key, api_host, odp_events).and_return(true) + event_manager.start!(odp_config) + + start_time = Time.now + event_manager.send_event(**events[0]) + event_manager.send_event(**events[1]) + event_manager.flush + + # Wait for all retries to complete (need at least 600ms for 200ms + 400ms delays) + sleep(1) until event_manager.instance_variable_get('@event_queue').empty? + elapsed_time = Time.now - start_time + + # Should have delays: 200ms + 400ms = 600ms minimum for 3 total attempts + expect(elapsed_time).to be >= 0.5 # Allow some tolerance + + # Should log retry attempts with delay info + expect(spy_logger).to have_received(:log).with( + Logger::DEBUG, /retrying \(attempt \d+ of \d+\) after/ + ).at_least(:once) + + event_manager.stop! + end + + it 'should calculate correct exponential backoff intervals' do + event_manager = Optimizely::OdpEventManager.new + + # First retry: 200ms + expect(event_manager.send(:calculate_retry_interval, 0)).to eq(0.2) + + # Second retry: 400ms + expect(event_manager.send(:calculate_retry_interval, 1)).to eq(0.4) + + # Third retry: 800ms + expect(event_manager.send(:calculate_retry_interval, 2)).to eq(0.8) + + # Fourth retry: capped at 1s + expect(event_manager.send(:calculate_retry_interval, 3)).to eq(1.0) + + # Fifth retry: still capped at 1s + expect(event_manager.send(:calculate_retry_interval, 4)).to eq(1.0) + end end end