From 5d5f88cb57cd85ec196d69a32b56fe7b17a63b98 Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Tue, 28 Apr 2026 14:11:59 -0700 Subject: [PATCH 1/2] Only calculate aggregations when requested Why these changes are being introduced: Aggregations are currently calculated even when they are not requested in the GraphQL query. It would be more efficient to calculate them only when they are needed. Relevant ticket(s): - [USE-491](https://mitlibraries.atlassian.net/browse/USE-491) How this addresses that need: This adds a `requested_aggregations` method to Query Type that evaluates which aggregations are requested in the query. This information is then used in the Aggregations and Opensearch models to calculate only the requested aggregations. Side effects of this change: None. --- app/graphql/types/query_type.rb | 31 +++++++++++++-------- app/models/aggregations.rb | 12 ++++++++ app/models/opensearch.rb | 9 ++++-- test/models/aggregations_test.rb | 48 ++++++++++++++++++++++++++++++++ test/models/opensearch_test.rb | 41 +++++++++++++++++++++++++++ 5 files changed, 128 insertions(+), 13 deletions(-) create mode 100644 test/models/aggregations_test.rb diff --git a/app/graphql/types/query_type.rb b/app/graphql/types/query_type.rb index 91859d9f..d946f626 100644 --- a/app/graphql/types/query_type.rb +++ b/app/graphql/types/query_type.rb @@ -109,7 +109,8 @@ def search(searchterm:, citation:, contributors:, funding_information:, geodista query = construct_query(searchterm, citation, contributors, funding_information, geodistance, geobox, identifiers, locations, subjects, title, source, boolean_type, filters, per_page, query_mode) - results = Opensearch.new.search(from, query, Timdex::OSClient, highlight: highlight_requested?, index: index, fulltext: fulltext, query_mode: query_mode) + results = Opensearch.new.search(from, query, Timdex::OSClient, highlight: highlight_requested?, index: index, + fulltext: fulltext, query_mode: query_mode, requested_aggregations: requested_aggregations) response = {} response[:hits] = results['hits']['total']['value'] @@ -122,6 +123,12 @@ def highlight_requested? context[:tracers].first.log_data[:used_fields].include?('Record.highlight') end + def requested_aggregations + used_fields = context[:tracers].first.log_data[:used_fields] + used_fields.select { |field| field.start_with?('Aggregations.') } + .map { |field| field.sub('Aggregations.', '').to_sym } + end + # Long-term, we will probably want to define these fields discretely in RecordType. However, this might end up # adding confusion while we are maintaining deprecated fields in that class. We should refactor this either soon # after removing GraphQL V1 or as part of that work. @@ -174,17 +181,19 @@ def source_deprecation_handler(query, new_source, old_source) end def collapse_buckets(es_aggs) + return nil if es_aggs.nil? || es_aggs.empty? + { - access_to_files: es_aggs['access_to_files']['only_file_access']['access_types']['buckets'], - contributors: es_aggs['contributors']['contributor_names']['buckets'], - source: es_aggs['source']['buckets'], - subjects: es_aggs['subjects']['subject_names']['buckets'], - places: es_aggs['places']['only_spatial']['place_names']['buckets'], - languages: es_aggs['languages']['buckets'], - literary_form: es_aggs['literary_form']['buckets'], - format: es_aggs['content_format']['buckets'], - content_type: es_aggs['content_type']['buckets'] - } + access_to_files: es_aggs.dig('access_to_files', 'only_file_access', 'access_types', 'buckets'), + contributors: es_aggs.dig('contributors', 'contributor_names', 'buckets'), + source: es_aggs.dig('source', 'buckets'), + subjects: es_aggs.dig('subjects', 'subject_names', 'buckets'), + places: es_aggs.dig('places', 'only_spatial', 'place_names', 'buckets'), + languages: es_aggs.dig('languages', 'buckets'), + literary_form: es_aggs.dig('literary_form', 'buckets'), + format: es_aggs.dig('content_format', 'buckets'), + content_type: es_aggs.dig('content_type', 'buckets') + }.compact end end end diff --git a/app/models/aggregations.rb b/app/models/aggregations.rb index 9bfdf3fa..aa6e65f7 100644 --- a/app/models/aggregations.rb +++ b/app/models/aggregations.rb @@ -16,6 +16,18 @@ def self.all } end + # Return only the aggregations that were requested + # @param requested_names [Array] Array of aggregation names to include (e.g., [:source, :contributors]) + # @return [Hash] Filtered aggregations hash with only requested aggregations + def self.for_request(requested_names) + return {} if requested_names.nil? || requested_names.empty? + + all_aggs = all + requested_names.each_with_object({}) do |name, result| + result[name] = all_aggs[name] if all_aggs.key?(name) + end + end + def self.access_to_files { nested: { diff --git a/app/models/opensearch.rb b/app/models/opensearch.rb index d378b219..37ee33f7 100644 --- a/app/models/opensearch.rb +++ b/app/models/opensearch.rb @@ -3,11 +3,13 @@ class Opensearch SIZE = 20 MAX_SIZE = 200 - def search(from, params, client, highlight: false, index: nil, fulltext: false, query_mode: 'keyword') + def search(from, params, client, highlight: false, index: nil, fulltext: false, query_mode: 'keyword', + requested_aggregations: []) @params = params @highlight = highlight @fulltext = fulltext?(fulltext) @query_mode = query_mode + @requested_aggregations = requested_aggregations index = default_index unless index.present? client.search(index:, body: build_query(from)) @@ -39,10 +41,13 @@ def build_query(from) from:, size: calculate_size, query:, - aggregations: Aggregations.all, sort: sort_builder.build } + # Only include aggregations if any were requested + aggregations = Aggregations.for_request(@requested_aggregations) + query_hash[:aggregations] = aggregations if aggregations.present? + source = source_builder.build query_hash[:_source] = source if source.present? diff --git a/test/models/aggregations_test.rb b/test/models/aggregations_test.rb new file mode 100644 index 00000000..cde8fc4f --- /dev/null +++ b/test/models/aggregations_test.rb @@ -0,0 +1,48 @@ +require 'test_helper' + +class AggregationsTest < ActiveSupport::TestCase + test 'for_request returns all aggregations when all are requested' do + requested = %i[access_to_files contributors content_type content_format languages literary_form places source + subjects] + result = Aggregations.for_request(requested) + + assert_equal requested.size, result.size + requested.each do |agg_name| + assert result.key?(agg_name) + end + end + + test 'for_request returns only requested aggregations' do + requested = %i[source contributors] + result = Aggregations.for_request(requested) + + assert_equal 2, result.size + assert result.key?(:source) + assert result.key?(:contributors) + end + + test 'for_request returns empty hash when given empty array' do + result = Aggregations.for_request([]) + + assert_empty result + assert result.is_a?(Hash) + end + + test 'for_request returns empty hash when given nil' do + result = Aggregations.for_request(nil) + + assert_empty result + assert result.is_a?(Hash) + end + + test 'for_request ignores invalid aggregation names' do + requested = %i[source invalid_agg contributors another_invalid] + result = Aggregations.for_request(requested) + + assert_equal 2, result.size + assert result.key?(:source) + assert result.key?(:contributors) + assert_not result.key?(:invalid_agg) + assert_not result.key?(:another_invalid) + end +end diff --git a/test/models/opensearch_test.rb b/test/models/opensearch_test.rb index 45235435..1b297063 100644 --- a/test/models/opensearch_test.rb +++ b/test/models/opensearch_test.rb @@ -161,4 +161,45 @@ class OpensearchTest < ActiveSupport::TestCase result = os.query assert_equal(mock_response, result) end + + test 'build_query includes aggregations when requested' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, %i[source contributors]) + + json = JSON.parse(os.build_query(0)) + assert json.key?('aggregations') + assert json['aggregations'].key?('source') + assert json['aggregations'].key?('contributors') + assert_not json['aggregations'].key?('languages') + end + + test 'build_query excludes aggregations when none requested' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, []) + + json = JSON.parse(os.build_query(0)) + assert_not json.key?('aggregations') + end + + test 'build_query excludes aggregations when requested_aggregations is nil' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, nil) + + json = JSON.parse(os.build_query(0)) + assert_not json.key?('aggregations') + end + + test 'build_query with aggregations ignores invalid aggregation names' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, %i[source invalid_agg]) + + json = JSON.parse(os.build_query(0)) + assert json.key?('aggregations') + assert json['aggregations'].key?('source') + assert_not json['aggregations'].key?('invalid_agg') + end end From 96b20dfef767234462f032cb0d2befa02ed14299 Mon Sep 17 00:00:00 2001 From: jazairi <16103405+jazairi@users.noreply.github.com> Date: Mon, 4 May 2026 13:24:49 -0700 Subject: [PATCH 2/2] Convert aggregation fields to format expected by aggregations model --- app/graphql/types/query_type.rb | 12 +++- test/controllers/graphql_controller_test.rb | 78 +++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/app/graphql/types/query_type.rb b/app/graphql/types/query_type.rb index d946f626..ebdc8966 100644 --- a/app/graphql/types/query_type.rb +++ b/app/graphql/types/query_type.rb @@ -123,10 +123,20 @@ def highlight_requested? context[:tracers].first.log_data[:used_fields].include?('Record.highlight') end + # Convert aggregation fields to format expected by aggregations model. + # We believe format was set to `content_format` because when TIMDEX was a rest REST API, the + # format parameter would likely have triggered the format feature (i.e. format html, format + # json, format xml, etc). We are retaining this out of caution. + def requested_aggregation_field(field_name) + return :content_format if field_name == 'format' + + field_name.underscore.to_sym + end + def requested_aggregations used_fields = context[:tracers].first.log_data[:used_fields] used_fields.select { |field| field.start_with?('Aggregations.') } - .map { |field| field.sub('Aggregations.', '').to_sym } + .map { |field| requested_aggregation_field(field.sub('Aggregations.', '')) } end # Long-term, we will probably want to define these fields discretely in RecordType. However, this might end up diff --git a/test/controllers/graphql_controller_test.rb b/test/controllers/graphql_controller_test.rb index ece7c64d..10f93236 100644 --- a/test/controllers/graphql_controller_test.rb +++ b/test/controllers/graphql_controller_test.rb @@ -1062,4 +1062,82 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest end end end + + test 'graphql search with camelCase aggregation fields (accessToFiles, contentType)' do + VCR.use_cassette('opensearch init') do + VCR.use_cassette('graphql search data analytics') do + post '/graphql', params: { query: '{ + search(searchterm: "data analytics") { + aggregations { + accessToFiles { + key + docCount + } + contentType { + key + docCount + } + } + } + }' } + assert_equal(200, response.status) + json = JSON.parse(response.body) + aggs = json['data']['search']['aggregations'] + + # Verify aggregations are present when requested + assert_not_nil aggs + assert_not_nil aggs['accessToFiles'] + assert_not_nil aggs['contentType'] + + # Verify structure + assert aggs['accessToFiles'].is_a?(Array) + assert aggs['contentType'].is_a?(Array) + end + end + end + + test 'graphql search with format aggregation (should map to content_format)' do + VCR.use_cassette('opensearch init') do + VCR.use_cassette('graphql search data analytics') do + post '/graphql', params: { query: '{ + search(searchterm: "data analytics") { + aggregations { + format { + key + docCount + } + } + } + }' } + assert_equal(200, response.status) + json = JSON.parse(response.body) + aggs = json['data']['search']['aggregations'] + + # Verify format aggregation is present and populated + assert_not_nil aggs + assert_not_nil aggs['format'] + assert aggs['format'].is_a?(Array) + end + end + end + + test 'graphql search without aggregations excludes them from OpenSearch query' do + VCR.use_cassette('opensearch init') do + VCR.use_cassette('graphql search data analytics') do + post '/graphql', params: { query: '{ + search(searchterm: "data analytics") { + hits + records { + title + } + } + }' } + assert_equal(200, response.status) + json = JSON.parse(response.body) + + # Verify aggregations field is null when not requested + assert_nil json['data']['search']['aggregations'] + end + end + end end