diff --git a/app/graphql/types/query_type.rb b/app/graphql/types/query_type.rb index 91859d9..ebdc896 100644 --- a/app/graphql/types/query_type.rb +++ b/app/graphql/types/query_type.rb @@ -109,7 +109,8 @@ def search(searchterm:, citation:, contributors:, funding_information:, geodista query = construct_query(searchterm, citation, contributors, funding_information, geodistance, geobox, identifiers, locations, subjects, title, source, boolean_type, filters, per_page, query_mode) - results = Opensearch.new.search(from, query, Timdex::OSClient, highlight: highlight_requested?, index: index, fulltext: fulltext, query_mode: query_mode) + results = Opensearch.new.search(from, query, Timdex::OSClient, highlight: highlight_requested?, index: index, + fulltext: fulltext, query_mode: query_mode, requested_aggregations: requested_aggregations) response = {} response[:hits] = results['hits']['total']['value'] @@ -122,6 +123,22 @@ def highlight_requested? context[:tracers].first.log_data[:used_fields].include?('Record.highlight') end + # Convert aggregation fields to format expected by aggregations model. + # We believe format was set to `content_format` because when TIMDEX was a rest REST API, the + # format parameter would likely have triggered the format feature (i.e. format html, format + # json, format xml, etc). We are retaining this out of caution. + def requested_aggregation_field(field_name) + return :content_format if field_name == 'format' + + field_name.underscore.to_sym + end + + def requested_aggregations + used_fields = context[:tracers].first.log_data[:used_fields] + used_fields.select { |field| field.start_with?('Aggregations.') } + .map { |field| requested_aggregation_field(field.sub('Aggregations.', '')) } + end + # Long-term, we will probably want to define these fields discretely in RecordType. However, this might end up # adding confusion while we are maintaining deprecated fields in that class. We should refactor this either soon # after removing GraphQL V1 or as part of that work. @@ -174,17 +191,19 @@ def source_deprecation_handler(query, new_source, old_source) end def collapse_buckets(es_aggs) + return nil if es_aggs.nil? || es_aggs.empty? + { - access_to_files: es_aggs['access_to_files']['only_file_access']['access_types']['buckets'], - contributors: es_aggs['contributors']['contributor_names']['buckets'], - source: es_aggs['source']['buckets'], - subjects: es_aggs['subjects']['subject_names']['buckets'], - places: es_aggs['places']['only_spatial']['place_names']['buckets'], - languages: es_aggs['languages']['buckets'], - literary_form: es_aggs['literary_form']['buckets'], - format: es_aggs['content_format']['buckets'], - content_type: es_aggs['content_type']['buckets'] - } + access_to_files: es_aggs.dig('access_to_files', 'only_file_access', 'access_types', 'buckets'), + contributors: es_aggs.dig('contributors', 'contributor_names', 'buckets'), + source: es_aggs.dig('source', 'buckets'), + subjects: es_aggs.dig('subjects', 'subject_names', 'buckets'), + places: es_aggs.dig('places', 'only_spatial', 'place_names', 'buckets'), + languages: es_aggs.dig('languages', 'buckets'), + literary_form: es_aggs.dig('literary_form', 'buckets'), + format: es_aggs.dig('content_format', 'buckets'), + content_type: es_aggs.dig('content_type', 'buckets') + }.compact end end end diff --git a/app/models/aggregations.rb b/app/models/aggregations.rb index 9bfdf3f..aa6e65f 100644 --- a/app/models/aggregations.rb +++ b/app/models/aggregations.rb @@ -16,6 +16,18 @@ def self.all } end + # Return only the aggregations that were requested + # @param requested_names [Array] Array of aggregation names to include (e.g., [:source, :contributors]) + # @return [Hash] Filtered aggregations hash with only requested aggregations + def self.for_request(requested_names) + return {} if requested_names.nil? || requested_names.empty? + + all_aggs = all + requested_names.each_with_object({}) do |name, result| + result[name] = all_aggs[name] if all_aggs.key?(name) + end + end + def self.access_to_files { nested: { diff --git a/app/models/opensearch.rb b/app/models/opensearch.rb index d378b21..37ee33f 100644 --- a/app/models/opensearch.rb +++ b/app/models/opensearch.rb @@ -3,11 +3,13 @@ class Opensearch SIZE = 20 MAX_SIZE = 200 - def search(from, params, client, highlight: false, index: nil, fulltext: false, query_mode: 'keyword') + def search(from, params, client, highlight: false, index: nil, fulltext: false, query_mode: 'keyword', + requested_aggregations: []) @params = params @highlight = highlight @fulltext = fulltext?(fulltext) @query_mode = query_mode + @requested_aggregations = requested_aggregations index = default_index unless index.present? client.search(index:, body: build_query(from)) @@ -39,10 +41,13 @@ def build_query(from) from:, size: calculate_size, query:, - aggregations: Aggregations.all, sort: sort_builder.build } + # Only include aggregations if any were requested + aggregations = Aggregations.for_request(@requested_aggregations) + query_hash[:aggregations] = aggregations if aggregations.present? + source = source_builder.build query_hash[:_source] = source if source.present? diff --git a/test/controllers/graphql_controller_test.rb b/test/controllers/graphql_controller_test.rb index ece7c64..10f9323 100644 --- a/test/controllers/graphql_controller_test.rb +++ b/test/controllers/graphql_controller_test.rb @@ -1062,4 +1062,82 @@ class GraphqlControllerTest < ActionDispatch::IntegrationTest end end end + + test 'graphql search with camelCase aggregation fields (accessToFiles, contentType)' do + VCR.use_cassette('opensearch init') do + VCR.use_cassette('graphql search data analytics') do + post '/graphql', params: { query: '{ + search(searchterm: "data analytics") { + aggregations { + accessToFiles { + key + docCount + } + contentType { + key + docCount + } + } + } + }' } + assert_equal(200, response.status) + json = JSON.parse(response.body) + aggs = json['data']['search']['aggregations'] + + # Verify aggregations are present when requested + assert_not_nil aggs + assert_not_nil aggs['accessToFiles'] + assert_not_nil aggs['contentType'] + + # Verify structure + assert aggs['accessToFiles'].is_a?(Array) + assert aggs['contentType'].is_a?(Array) + end + end + end + + test 'graphql search with format aggregation (should map to content_format)' do + VCR.use_cassette('opensearch init') do + VCR.use_cassette('graphql search data analytics') do + post '/graphql', params: { query: '{ + search(searchterm: "data analytics") { + aggregations { + format { + key + docCount + } + } + } + }' } + assert_equal(200, response.status) + json = JSON.parse(response.body) + aggs = json['data']['search']['aggregations'] + + # Verify format aggregation is present and populated + assert_not_nil aggs + assert_not_nil aggs['format'] + assert aggs['format'].is_a?(Array) + end + end + end + + test 'graphql search without aggregations excludes them from OpenSearch query' do + VCR.use_cassette('opensearch init') do + VCR.use_cassette('graphql search data analytics') do + post '/graphql', params: { query: '{ + search(searchterm: "data analytics") { + hits + records { + title + } + } + }' } + assert_equal(200, response.status) + json = JSON.parse(response.body) + + # Verify aggregations field is null when not requested + assert_nil json['data']['search']['aggregations'] + end + end + end end diff --git a/test/models/aggregations_test.rb b/test/models/aggregations_test.rb new file mode 100644 index 0000000..cde8fc4 --- /dev/null +++ b/test/models/aggregations_test.rb @@ -0,0 +1,48 @@ +require 'test_helper' + +class AggregationsTest < ActiveSupport::TestCase + test 'for_request returns all aggregations when all are requested' do + requested = %i[access_to_files contributors content_type content_format languages literary_form places source + subjects] + result = Aggregations.for_request(requested) + + assert_equal requested.size, result.size + requested.each do |agg_name| + assert result.key?(agg_name) + end + end + + test 'for_request returns only requested aggregations' do + requested = %i[source contributors] + result = Aggregations.for_request(requested) + + assert_equal 2, result.size + assert result.key?(:source) + assert result.key?(:contributors) + end + + test 'for_request returns empty hash when given empty array' do + result = Aggregations.for_request([]) + + assert_empty result + assert result.is_a?(Hash) + end + + test 'for_request returns empty hash when given nil' do + result = Aggregations.for_request(nil) + + assert_empty result + assert result.is_a?(Hash) + end + + test 'for_request ignores invalid aggregation names' do + requested = %i[source invalid_agg contributors another_invalid] + result = Aggregations.for_request(requested) + + assert_equal 2, result.size + assert result.key?(:source) + assert result.key?(:contributors) + assert_not result.key?(:invalid_agg) + assert_not result.key?(:another_invalid) + end +end diff --git a/test/models/opensearch_test.rb b/test/models/opensearch_test.rb index 4523543..1b29706 100644 --- a/test/models/opensearch_test.rb +++ b/test/models/opensearch_test.rb @@ -161,4 +161,45 @@ class OpensearchTest < ActiveSupport::TestCase result = os.query assert_equal(mock_response, result) end + + test 'build_query includes aggregations when requested' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, %i[source contributors]) + + json = JSON.parse(os.build_query(0)) + assert json.key?('aggregations') + assert json['aggregations'].key?('source') + assert json['aggregations'].key?('contributors') + assert_not json['aggregations'].key?('languages') + end + + test 'build_query excludes aggregations when none requested' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, []) + + json = JSON.parse(os.build_query(0)) + assert_not json.key?('aggregations') + end + + test 'build_query excludes aggregations when requested_aggregations is nil' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, nil) + + json = JSON.parse(os.build_query(0)) + assert_not json.key?('aggregations') + end + + test 'build_query with aggregations ignores invalid aggregation names' do + os = Opensearch.new + os.instance_variable_set(:@params, { q: 'test' }) + os.instance_variable_set(:@requested_aggregations, %i[source invalid_agg]) + + json = JSON.parse(os.build_query(0)) + assert json.key?('aggregations') + assert json['aggregations'].key?('source') + assert_not json['aggregations'].key?('invalid_agg') + end end