diff --git a/README.md b/README.md index 54da7c75..cc8e48ef 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,8 @@ may have unexpected consequences if applied to other TIMDEX UI apps. - `PLATFORM_NAME`: The value set is added to the header after the MIT Libraries logo. The logic and CSS for this comes from our theme gem. - `PRIMO_NDE_VID`: The Primo view ID for NDE Only required if `FEATURE_PRIMO_NDE_LINKS` is enabled. Ask Enterprise Systems for value. - `PRIMO_TIMEOUT`: The number of seconds before a Primo request times out (default 6). +- `TIMDEX_TIMEOUT`: The number of seconds before a TIMDEX GraphQL request times out (default 20). Set below `RACK_TIMEOUT_SERVICE_TIMEOUT` to allow graceful partial results when TIMDEX is slow. +- `RACK_TIMEOUT_SERVICE_TIMEOUT`: The number of seconds before a request times out at the Rack middleware level. On Heroku, the hard limit is 30 seconds, so this should be set to less than 30 (recommended 25 seconds). When both Primo and TIMDEX fetch threads are active, ensure this is greater than `MERGED_SEARCH_THREAD_TIMEOUT` to allow graceful timeout handling. This is typically configured via Heroku environment variables. - `REQUESTS_PER_PERIOD` - number of requests that can be made for general throttles per `REQUEST_PERIOD` - `REQUEST_PERIOD` - time in minutes used along with `REQUESTS_PER_PERIOD` - `REDIRECT_REQUESTS_PER_PERIOD`- number of requests that can be made that the query string starts with our legacy redirect parameter to throttle per `REQUEST_PERIOD` diff --git a/app/controllers/search_controller.rb b/app/controllers/search_controller.rb index ff804b55..78ef3797 100644 --- a/app/controllers/search_controller.rb +++ b/app/controllers/search_controller.rb @@ -37,7 +37,10 @@ def results # Render the response in HTML or JSON format respond_to do |format| - format.json { render json: { results: @results, pagination: @pagination, errors: @errors } } + format.json do + render json: { results: @results, pagination: @pagination, errors: @errors, + incomplete_results: @incomplete_results } + end format.html { render :results } end end @@ -70,6 +73,7 @@ def load_geodata_results # Handle errors @errors = extract_errors(response) + @incomplete_results = nil return unless @errors.nil? hits = response.dig(:data, 'search', 'hits') || 0 @@ -85,6 +89,7 @@ def load_primo_results @pagination = data[:pagination] @errors = data[:errors] @show_primo_continuation = data[:show_continuation] + @incomplete_results = nil end def load_timdex_results @@ -92,6 +97,7 @@ def load_timdex_results @results = data[:results] @pagination = data[:pagination] @errors = data[:errors] + @incomplete_results = nil end def load_all_results @@ -114,6 +120,7 @@ def load_all_results @errors = data[:errors] @pagination = data[:pagination] @show_primo_continuation = data[:show_primo_continuation] + @incomplete_results = data[:incomplete_results] end def fetch_primo_data(offset: nil, per_page: nil) @@ -152,6 +159,8 @@ def fetch_primo_data(offset: nil, per_page: nil) { results: results, pagination: pagination, errors: errors, show_continuation: show_continuation, hits: hits } + rescue HTTP::TimeoutError + { results: [], pagination: {}, errors: nil, show_continuation: false, hits: 0, timed_out: true } rescue StandardError => e { results: [], pagination: {}, errors: handle_primo_errors(e), show_continuation: false, hits: 0 } end @@ -174,6 +183,8 @@ def fetch_timdex_data(offset: nil, per_page: nil) else { results: [], pagination: {}, errors: errors, hits: 0 } end + rescue Net::ReadTimeout + { results: [], pagination: {}, errors: nil, hits: 0, timed_out: true } end def active_filters @@ -391,7 +402,8 @@ def handle_primo_errors(error) 'message' => 'Hmm, we seem to be having difficulties...', 'description' => 'In the meantime, try searching these tools directly.', 'links' => [ - { 'label' => "MIT's WorldCat", 'description' => 'Books and media', 'url' => 'https://libraries.mit.edu/worldcat' }, + { 'label' => "MIT's WorldCat", 'description' => 'Books and media', + 'url' => 'https://libraries.mit.edu/worldcat' }, { 'label' => 'Google Scholar', 'description' => 'Articles', 'url' => 'https://scholar.google.com/' }, { 'label' => 'ArchivesSpace', 'description' => 'MIT archives', 'url' => 'https://archivesspace.mit.edu/' }, { 'label' => 'DSpace@MIT', 'description' => 'MIT research', 'url' => 'https://dspace.mit.edu/' } diff --git a/app/models/merged_search_service.rb b/app/models/merged_search_service.rb index 880c0b7d..9cc19cc9 100644 --- a/app/models/merged_search_service.rb +++ b/app/models/merged_search_service.rb @@ -115,6 +115,10 @@ def write_cached_totals(totals) # fetchers. Each fetcher should return the usual response hash including # `:results` and `:hits`. # + # Timeouts are handled at the HTTP level by each fetcher (PRIMO_TIMEOUT and + # TIMDEX_TIMEOUT), which raise catchable exceptions that the fetchers convert + # to `timed_out: true` responses for graceful partial-results display. + # # WARNING: exceptions raised inside these threads will not automatically # propagate to the caller; callers/tests should account for this. # @@ -124,6 +128,7 @@ def write_cached_totals(totals) def parallel_fetch(offset:, per_page:) primo = nil timdex = nil + threads = [] threads << Thread.new { primo = @primo_fetcher.call(offset: offset, per_page: per_page, query: @enhanced_query) } threads << Thread.new { timdex = @timdex_fetcher.call(offset: offset, per_page: per_page, query: @enhanced_query) } @@ -175,7 +180,7 @@ def fetch_all_tab_page_chunks(paginator) # @param current_page [Integer] # @param per_page [Integer] # @param deeper [Boolean] whether this was a deeper-page flow - # @return [Hash] response with :results, :errors, :pagination, :show_primo_continuation + # @return [Hash] response with :results, :errors, :pagination, :show_primo_continuation, :incomplete_results def assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, per_page, deeper: false) primo_total = primo_data[:hits] || 0 timdex_total = timdex_data[:hits] || 0 @@ -184,6 +189,9 @@ def assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, pe errors = combine_errors(primo_data[:errors], timdex_data[:errors]) pagination = Analyzer.new(@enhanced_query, timdex_total, :all, primo_total, per_page).pagination + # Detect if results are incomplete due to timeouts + incomplete_results = detect_incomplete_results(primo_data, timdex_data) + show_primo_continuation = if deeper # Use the Primo-specific API offset (calculated from the paginator) # when deciding whether to show a Primo continuation. @@ -201,7 +209,24 @@ def assemble_all_tab_result(paginator, primo_data, timdex_data, current_page, pe primo_data[:show_continuation] end - { results: merged, errors: errors, pagination: pagination, show_primo_continuation: show_primo_continuation } + { results: merged, errors: errors, pagination: pagination, show_primo_continuation: show_primo_continuation, + incomplete_results: incomplete_results } + end + + # Detect if search results are incomplete due to source timeouts. + # + # When a fetcher times out, it returns timed_out: true in the response. + # This method identifies which sources timed out and returns an indicator for the UI. + # + # @param primo_data [Hash] response from Primo fetcher + # @param timdex_data [Hash] response from TIMDEX fetcher + # @return [Hash, nil] { sources: Array } e.g., { sources: ['Primo'] } or nil if complete + def detect_incomplete_results(primo_data, timdex_data) + timed_out_sources = [] + timed_out_sources << 'Primo' if primo_data[:timed_out] + timed_out_sources << 'TIMDEX' if timdex_data[:timed_out] + + timed_out_sources.any? ? { sources: timed_out_sources } : nil end # Merge multiple error arrays into a single array or nil when empty. @@ -221,7 +246,7 @@ def build_paginator_from_totals(totals, current_page, per_page) current_page: current_page, per_page: per_page) end - # Note: default fetcher implementations were removed to enforce explicit + # NOTE: default fetcher implementations were removed to enforce explicit # dependency injection. Callers must provide `primo_fetcher` and # `timdex_fetcher` when constructing `MergedSearchService`. diff --git a/app/models/timdex_base.rb b/app/models/timdex_base.rb index 22688802..9f8fa3ed 100644 --- a/app/models/timdex_base.rb +++ b/app/models/timdex_base.rb @@ -6,6 +6,12 @@ class TimdexBase def headers(*) { 'User-Agent': 'MIT Libraries Client' } end + + def connection + conn = super + conn.read_timeout = ENV.fetch('TIMDEX_TIMEOUT', '20').to_i + conn + end end Schema = GraphQL::Client.load_schema('config/schema/schema.json') diff --git a/app/views/search/_incomplete_results_warning.html.erb b/app/views/search/_incomplete_results_warning.html.erb new file mode 100644 index 00000000..de9eb080 --- /dev/null +++ b/app/views/search/_incomplete_results_warning.html.erb @@ -0,0 +1,16 @@ +<% return unless incomplete_results.present? %> +<% sources = incomplete_results[:sources] %> + + diff --git a/app/views/search/results.html.erb b/app/views/search/results.html.erb index b0d3f0a1..6f42c30e 100644 --- a/app/views/search/results.html.erb +++ b/app/views/search/results.html.erb @@ -7,6 +7,8 @@
<%= render(partial: 'shared/error', collection: @errors) %> + <%= render(partial: 'search/incomplete_results_warning', locals: { incomplete_results: @incomplete_results }) %> + <%= render(partial: 'trigger_tacos') if tacos_enabled? %> <%= turbo_frame_tag "search-results", data: { turbo_action: "advance" } do %> diff --git a/app/views/search/results_geo.html.erb b/app/views/search/results_geo.html.erb index 1d025023..f1494e43 100644 --- a/app/views/search/results_geo.html.erb +++ b/app/views/search/results_geo.html.erb @@ -13,6 +13,8 @@ <%= render(partial: 'shared/error', collection: @errors) %> + <%= render(partial: 'search/incomplete_results_warning', locals: { incomplete_results: @incomplete_results }) %> +
<% if @filters.present? %>