From 33eb80f10bc4f5698da4d8f8444568c826ca5a13 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 20 May 2026 13:02:39 -0500 Subject: [PATCH 1/6] Add json ingestion gem shell --- CODEBASE_OVERVIEW.md | 6 ++- Gemfile.lock | 8 ++++ elasticgraph-json_ingestion/.rspec | 1 + elasticgraph-json_ingestion/.yardopts | 1 + elasticgraph-json_ingestion/Gemfile | 1 + elasticgraph-json_ingestion/LICENSE.txt | 21 +++++++++ elasticgraph-json_ingestion/README.md | 20 +++++++++ .../elasticgraph-json_ingestion.gemspec | 43 +++++++++++++++++++ elasticgraph-support/README.md | 3 ++ 9 files changed, 103 insertions(+), 1 deletion(-) create mode 120000 elasticgraph-json_ingestion/.rspec create mode 120000 elasticgraph-json_ingestion/.yardopts create mode 120000 elasticgraph-json_ingestion/Gemfile create mode 100644 elasticgraph-json_ingestion/LICENSE.txt create mode 100644 elasticgraph-json_ingestion/README.md create mode 100644 elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec diff --git a/CODEBASE_OVERVIEW.md b/CODEBASE_OVERVIEW.md index d6e62ebfd..a7f4b58ea 100644 --- a/CODEBASE_OVERVIEW.md +++ b/CODEBASE_OVERVIEW.md @@ -192,12 +192,13 @@ graph LR; click opensearch-ruby href "https://rubygems.org/gems/opensearch-ruby" "Open on RubyGems.org" _blank; ``` -### Extensions (5 gems) +### Extensions (6 gems) These libraries extend ElasticGraph to provide optional but commonly needed functionality. * [elasticgraph-apollo](elasticgraph-apollo/README.md): Transforms an ElasticGraph project into an Apollo subgraph. * [elasticgraph-health_check](elasticgraph-health_check/README.md): Provides a health check for high availability ElasticGraph deployments. +* [elasticgraph-json_ingestion](elasticgraph-json_ingestion/README.md): JSON Schema ingestion support for ElasticGraph. * [elasticgraph-query_interceptor](elasticgraph-query_interceptor/README.md): Intercepts ElasticGraph datastore queries. * [elasticgraph-query_registry](elasticgraph-query_registry/README.md): Provides a source-controlled query registry for ElasticGraph applications. * [elasticgraph-warehouse](elasticgraph-warehouse/README.md): Extends ElasticGraph to support ingestion into a data warehouse. @@ -216,6 +217,7 @@ graph LR; apollo-federation["apollo-federation"]; elasticgraph-health_check["eg-health_check"]; elasticgraph-datastore_core["eg-datastore_core"]; + elasticgraph-json_ingestion["eg-json_ingestion"]; elasticgraph-query_interceptor["eg-query_interceptor"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; elasticgraph-query_registry["eg-query_registry"]; @@ -228,6 +230,7 @@ graph LR; elasticgraph-health_check --> elasticgraph-datastore_core; elasticgraph-health_check --> elasticgraph-graphql; elasticgraph-health_check --> elasticgraph-support; + elasticgraph-json_ingestion --> elasticgraph-support; elasticgraph-query_interceptor --> elasticgraph-graphql; elasticgraph-query_interceptor --> elasticgraph-schema_artifacts; elasticgraph-query_registry --> elasticgraph-graphql; @@ -242,6 +245,7 @@ graph LR; class apollo-federation externalGemCatStyle; class elasticgraph-health_check targetGemStyle; class elasticgraph-datastore_core otherEgGemStyle; + class elasticgraph-json_ingestion targetGemStyle; class elasticgraph-query_interceptor targetGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class elasticgraph-query_registry targetGemStyle; diff --git a/Gemfile.lock b/Gemfile.lock index f7e347019..9e18f46ec 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -128,6 +128,12 @@ PATH elasticgraph-support (= 1.2.1.pre) hashdiff (~> 1.2, >= 1.2.1) +PATH + remote: elasticgraph-json_ingestion + specs: + elasticgraph-json_ingestion (1.2.1.pre) + elasticgraph-support (= 1.2.1.pre) + PATH remote: elasticgraph-lambda_support specs: @@ -698,6 +704,7 @@ DEPENDENCIES elasticgraph-indexer (= 1.2.1.pre)! elasticgraph-indexer_autoscaler_lambda (= 1.2.1.pre)! elasticgraph-indexer_lambda (= 1.2.1.pre)! + elasticgraph-json_ingestion (= 1.2.1.pre)! elasticgraph-lambda_support (= 1.2.1.pre)! elasticgraph-local (= 1.2.1.pre)! elasticgraph-opensearch (= 1.2.1.pre)! @@ -789,6 +796,7 @@ CHECKSUMS elasticgraph-indexer (1.2.1.pre) elasticgraph-indexer_autoscaler_lambda (1.2.1.pre) elasticgraph-indexer_lambda (1.2.1.pre) + elasticgraph-json_ingestion (1.2.1.pre) elasticgraph-lambda_support (1.2.1.pre) elasticgraph-local (1.2.1.pre) elasticgraph-opensearch (1.2.1.pre) diff --git a/elasticgraph-json_ingestion/.rspec b/elasticgraph-json_ingestion/.rspec new file mode 120000 index 000000000..67e6e21b3 --- /dev/null +++ b/elasticgraph-json_ingestion/.rspec @@ -0,0 +1 @@ +../spec_support/subdir_dot_rspec \ No newline at end of file diff --git a/elasticgraph-json_ingestion/.yardopts b/elasticgraph-json_ingestion/.yardopts new file mode 120000 index 000000000..e11a2057f --- /dev/null +++ b/elasticgraph-json_ingestion/.yardopts @@ -0,0 +1 @@ +../config/site/yardopts \ No newline at end of file diff --git a/elasticgraph-json_ingestion/Gemfile b/elasticgraph-json_ingestion/Gemfile new file mode 120000 index 000000000..26cb2ad91 --- /dev/null +++ b/elasticgraph-json_ingestion/Gemfile @@ -0,0 +1 @@ +../Gemfile \ No newline at end of file diff --git a/elasticgraph-json_ingestion/LICENSE.txt b/elasticgraph-json_ingestion/LICENSE.txt new file mode 100644 index 000000000..aa18b5db8 --- /dev/null +++ b/elasticgraph-json_ingestion/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 - 2026 Block, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/elasticgraph-json_ingestion/README.md b/elasticgraph-json_ingestion/README.md new file mode 100644 index 000000000..db1eb3988 --- /dev/null +++ b/elasticgraph-json_ingestion/README.md @@ -0,0 +1,20 @@ +# ElasticGraph::JSONIngestion + +JSON Schema ingestion support for ElasticGraph. + +This gem provides the schema-definition extension that generates JSON Schema artifacts for indexing +events and validates JSON-ingestion-specific schema options. + +## Dependency Diagram + +```mermaid +graph LR; + classDef targetGemStyle fill:#FADBD8,stroke:#EC7063,color:#000,stroke-width:2px; + classDef otherEgGemStyle fill:#A9DFBF,stroke:#2ECC71,color:#000; + classDef externalGemStyle fill:#E0EFFF,stroke:#70A1D7,color:#2980B9; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + class elasticgraph-json_ingestion targetGemStyle; + elasticgraph-support["elasticgraph-support"]; + elasticgraph-json_ingestion --> elasticgraph-support; + class elasticgraph-support otherEgGemStyle; +``` diff --git a/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec b/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec new file mode 100644 index 000000000..cdade27c0 --- /dev/null +++ b/elasticgraph-json_ingestion/elasticgraph-json_ingestion.gemspec @@ -0,0 +1,43 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require_relative "../elasticgraph-support/lib/elastic_graph/version" + +Gem::Specification.new do |spec| + spec.name = "elasticgraph-json_ingestion" + spec.version = ElasticGraph::VERSION + spec.authors = ["Josh Wilson", "Myron Marston", "Block Engineering"] + spec.email = ["joshuaw@squareup.com"] + spec.homepage = "https://block.github.io/elasticgraph/" + spec.license = "MIT" + spec.summary = "JSON Schema ingestion support for ElasticGraph." + + spec.metadata = { + "bug_tracker_uri" => "https://github.com/block/elasticgraph/issues", + "changelog_uri" => "https://github.com/block/elasticgraph/releases/tag/v#{ElasticGraph::VERSION}", + "documentation_uri" => "https://block.github.io/elasticgraph/api-docs/v#{ElasticGraph::VERSION}/", + "homepage_uri" => "https://block.github.io/elasticgraph/", + "source_code_uri" => "https://github.com/block/elasticgraph/tree/v#{ElasticGraph::VERSION}/#{spec.name}", + "gem_category" => "extension" + } + + spec.files = Dir.chdir(File.expand_path(__dir__)) do + `git ls-files -z`.split("\x0").reject do |f| + (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features|sig)/|\.(?:git|travis|circleci)|appveyor)}) + end - [".rspec", "Gemfile", ".yardopts"] + end + + spec.required_ruby_version = [">= 3.4", "< 4.1"] + + spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION + + # This gem's schema-definition extension code references `elasticgraph-schema_definition`, but + # applications load it through schema-definition tasks after `elasticgraph-schema_definition` is already + # available. Keeping this as a development dependency avoids a runtime dependency cycle. + spec.add_development_dependency "elasticgraph-schema_definition", ElasticGraph::VERSION +end diff --git a/elasticgraph-support/README.md b/elasticgraph-support/README.md index 91f3096dc..8464cbc88 100644 --- a/elasticgraph-support/README.md +++ b/elasticgraph-support/README.md @@ -45,6 +45,9 @@ graph LR; elasticgraph-indexer["elasticgraph-indexer"]; elasticgraph-indexer --> elasticgraph-support; class elasticgraph-indexer otherEgGemStyle; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + elasticgraph-json_ingestion --> elasticgraph-support; + class elasticgraph-json_ingestion otherEgGemStyle; elasticgraph-opensearch["elasticgraph-opensearch"]; elasticgraph-opensearch --> elasticgraph-support; class elasticgraph-opensearch otherEgGemStyle; From ba63e0fb98aa5e55b9695a3d0b5ad18847155323 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 20 May 2026 13:08:48 -0500 Subject: [PATCH 2/6] Add schema definition extension plumbing --- Rakefile | 1 - config/schema.rb | 1 + config/site/Rakefile | 1 - .../site/examples/custom_resolver/schema.rb | 1 + config/site/examples/music/schema.rb | 1 + .../site/examples/music_simplified/schema.rb | 1 + config/site/support/doctest_helper.rb | 1 - .../for_index_template_spec.rb | 1 - elasticgraph-apollo/README.md | 16 +++++---- .../apollo_tests_implementation/Rakefile | 3 +- .../config/products_schema.rb | 1 + .../spec/acceptance/schema_evolution_spec.rb | 1 - .../spec/acceptance/schema_evolution_spec.rb | 1 - .../lib/elastic_graph/local/rake_tasks.rb | 29 ---------------- .../sig/elastic_graph/local/rake_tasks.rbs | 1 - .../elastic_graph/schema_definition/api.rb | 34 +++++++++++++++++-- .../schema_definition/factory.rb | 6 ++-- .../schema_definition/rake_tasks.rb | 9 ----- .../schema_artifact_manager.rb | 11 +++--- .../elastic_graph/schema_definition/state.rb | 2 ++ .../schema_definition/test_support.rb | 5 +-- .../elastic_graph/schema_definition/api.rbs | 1 + .../schema_definition/factory.rbs | 2 +- .../schema_definition/rake_tasks.rbs | 2 -- .../schema_artifact_manager.rbs | 2 -- .../elastic_graph/schema_definition/state.rbs | 2 ++ .../schema_definition/rake_tasks_spec.rb | 21 ++++++------ .../schema_definition/json_schema_spec.rb | 16 +++++++++ elasticgraph-warehouse/README.md | 7 ++-- .../schema_definition/rake_tasks_spec.rb | 10 +++--- .../project_template/Rakefile.tt | 3 -- .../project_template/config/schema.rb.tt | 3 ++ 32 files changed, 104 insertions(+), 92 deletions(-) diff --git a/Rakefile b/Rakefile index 31c2d71ad..831dc68b3 100644 --- a/Rakefile +++ b/Rakefile @@ -46,7 +46,6 @@ end configure_local_rake_tasks = ->(tasks) do tasks.schema_element_name_form = :snake_case - tasks.enforce_json_schema_version = false tasks.index_document_sizes = true tasks.env_port_mapping = {test: test_port} tasks.output = schema_def_output diff --git a/config/schema.rb b/config/schema.rb index 4d2206af7..743c82add 100644 --- a/config/schema.rb +++ b/config/schema.rb @@ -8,6 +8,7 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false end # Note: anytime you add a file to load here, you'll also have to update the list here: diff --git a/config/site/Rakefile b/config/site/Rakefile index 9110b85ec..c3eec0216 100644 --- a/config/site/Rakefile +++ b/config/site/Rakefile @@ -461,7 +461,6 @@ module ElasticGraph # to demonstrate any `Rakefile` APIs. ::ElasticGraph::Local::RakeTasks.new(local_config_yaml: settings_file, path_to_schema: schema_file) do |tasks| tasks.opensearch_versions = [] - tasks.enforce_json_schema_version = false end end diff --git a/config/site/examples/custom_resolver/schema.rb b/config/site/examples/custom_resolver/schema.rb index 4c64bcf48..bd1914489 100644 --- a/config/site/examples/custom_resolver/schema.rb +++ b/config/site/examples/custom_resolver/schema.rb @@ -10,6 +10,7 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false # :snippet-start: register_graphql_resolver require(require_path = "roll_dice_resolver") diff --git a/config/site/examples/music/schema.rb b/config/site/examples/music/schema.rb index 51a149472..0ca817eca 100644 --- a/config/site/examples/music/schema.rb +++ b/config/site/examples/music/schema.rb @@ -10,6 +10,7 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false end Dir["#{__dir__}/schema/**/*.rb"].each do |schema_def_file| diff --git a/config/site/examples/music_simplified/schema.rb b/config/site/examples/music_simplified/schema.rb index 49086b627..bc24e2187 100644 --- a/config/site/examples/music_simplified/schema.rb +++ b/config/site/examples/music_simplified/schema.rb @@ -1,5 +1,6 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false schema.object_type "Artist" do |t| t.field "id", "ID" diff --git a/config/site/support/doctest_helper.rb b/config/site/support/doctest_helper.rb index 6f6d256d3..f322eb599 100644 --- a/config/site/support/doctest_helper.rb +++ b/config/site/support/doctest_helper.rb @@ -82,7 +82,6 @@ module ElasticGraph artifacts_manager = @api.factory.new_schema_artifact_manager( schema_definition_results: @api.results, schema_artifacts_directory: "#{@tmp_dir}/schema_artifacts", - enforce_json_schema_version: true, output: ::StringIO.new ) diff --git a/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/for_index_template_spec.rb b/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/for_index_template_spec.rb index 88fab0b68..2af2dd7cb 100644 --- a/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/for_index_template_spec.rb +++ b/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/for_index_template_spec.rb @@ -167,7 +167,6 @@ def fetch_artifact_configuration(schema_artifacts, index_def_name) factory.new_schema_artifact_manager( schema_definition_results: schema_def_results, schema_artifacts_directory: Dir.pwd, - enforce_json_schema_version: true, output: output_io ).dump_artifacts diff --git a/elasticgraph-apollo/README.md b/elasticgraph-apollo/README.md index a47027663..2503b0377 100644 --- a/elasticgraph-apollo/README.md +++ b/elasticgraph-apollo/README.md @@ -64,15 +64,14 @@ index 2943335..26633c3 100644 require "elastic_graph/local/rake_tasks" require "elastic_graph/query_registry/rake_tasks" require "rspec/core/rake_task" -@@ -12,6 +13,8 @@ ElasticGraph::Local::RakeTasks.new( +@@ -12,5 +13,7 @@ ElasticGraph::Local::RakeTasks.new( local_config_yaml: settings_file, path_to_schema: "#{project_root}/config/schema.rb" ) do |tasks| + tasks.schema_definition_extension_modules << ElasticGraph::Apollo::SchemaDefinition::APIExtension + - # Set this to true once you're beyond the prototyping stage. - tasks.enforce_json_schema_version = false - + # Determines casing of field names. Can be either `:camelCase` or `:snake_case`. + tasks.schema_element_name_form = :camelCase ``` That's it! @@ -94,11 +93,16 @@ diff --git a/config/schema.rb b/config/schema.rb index 015c5fa..362cdcb 100644 --- a/config/schema.rb +++ b/config/schema.rb -@@ -4,6 +4,8 @@ ElasticGraph.define_schema do |schema| +@@ -4,7 +4,9 @@ ElasticGraph.define_schema do |schema| # ElasticGraph will tell you when you need to bump this. schema.json_schema_version 1 - +- ++ + schema.target_apollo_federation_version "2.3" ++ + # Set this to true once you're beyond the prototyping stage. + schema.enforce_json_schema_version false +- + # This registers the elasticgraph-query_registry extension, which can be used to reject queries that # clients have not registered (and to reject queries that differ from what a client has registered). diff --git a/elasticgraph-apollo/apollo_tests_implementation/Rakefile b/elasticgraph-apollo/apollo_tests_implementation/Rakefile index 6c688c0db..befade088 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/Rakefile +++ b/elasticgraph-apollo/apollo_tests_implementation/Rakefile @@ -17,6 +17,5 @@ ElasticGraph::SchemaDefinition::RakeTasks.new( index_document_sizes: false, path_to_schema: project_root / "config/products_schema.rb", schema_artifacts_directory: project_root / "config/schema/artifacts", - extension_modules: [ElasticGraph::Apollo::SchemaDefinition::APIExtension], - enforce_json_schema_version: false + extension_modules: [ElasticGraph::Apollo::SchemaDefinition::APIExtension] ) diff --git a/elasticgraph-apollo/apollo_tests_implementation/config/products_schema.rb b/elasticgraph-apollo/apollo_tests_implementation/config/products_schema.rb index 201e4057b..2530d7dc6 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/config/products_schema.rb +++ b/elasticgraph-apollo/apollo_tests_implementation/config/products_schema.rb @@ -18,6 +18,7 @@ module ApolloTestImpl # https://github.com/apollographql/apollo-federation-subgraph-compatibility/blob/2.0.0/COMPATIBILITY.md#products-schema-to-be-implemented-by-library-maintainers ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false schema.target_apollo_federation_version(federation_version) if federation_version unless federation_version == "2.0" diff --git a/elasticgraph-graphql/spec/acceptance/schema_evolution_spec.rb b/elasticgraph-graphql/spec/acceptance/schema_evolution_spec.rb index c055f4f5e..494152ebf 100644 --- a/elasticgraph-graphql/spec/acceptance/schema_evolution_spec.rb +++ b/elasticgraph-graphql/spec/acceptance/schema_evolution_spec.rb @@ -68,7 +68,6 @@ def dump_schema_artifacts(json_schema_version:, team_extras: "") index_document_sizes: true, path_to_schema: path_to_schema, schema_artifacts_directory: "config/schema/artifacts", - enforce_json_schema_version: true, output: output ) end diff --git a/elasticgraph-indexer/spec/acceptance/schema_evolution_spec.rb b/elasticgraph-indexer/spec/acceptance/schema_evolution_spec.rb index 286b58f5d..a7f241102 100644 --- a/elasticgraph-indexer/spec/acceptance/schema_evolution_spec.rb +++ b/elasticgraph-indexer/spec/acceptance/schema_evolution_spec.rb @@ -404,7 +404,6 @@ def dump_artifacts index_document_sizes: true, path_to_schema: path_to_schema, schema_artifacts_directory: "config/schema/artifacts", - enforce_json_schema_version: true, output: output ) end diff --git a/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb b/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb index d2ac91820..f33ab3b2d 100644 --- a/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb +++ b/elasticgraph-local/lib/elastic_graph/local/rake_tasks.rb @@ -213,33 +213,6 @@ class RakeTasks < ::Rake::TaskLib # @dynamic schema_definition_extension_modules, schema_definition_extension_modules= attr_accessor :schema_definition_extension_modules - # Whether or not to enforce the requirement that the JSON schema version is incremented every time - # dumping the JSON schemas results in a changed artifact. Defaults to `true`. - # - # @note Generally speaking, you will want this to be `true` for any ElasticGraph application that is in - # production as the versioning of JSON schemas is what supports safe schema evolution as it allows - # ElasticGraph to identify which version of the JSON schema the publishing system was operating on - # when it published an event. - # - # It can be useful to set it to `false` before your application is in production, as you do not want - # to be forced to bump the version after every single schema change while you are building an initial - # prototype. - # - # @return [Boolean] whether to require `json_schema_version` to be incremented on changes that impact `json_schemas.yaml` - # @see SchemaDefinition::API#json_schema_version - # - # @example Disable enforcement during initial prototyping - # ElasticGraph::Local::RakeTasks.new( - # local_config_yaml: "config/settings/local.yaml", - # path_to_schema: "config/schema.rb" - # ) do |tasks| - # # TODO: remove this once we're past the prototyping stage - # tasks.enforce_json_schema_version = false - # end - # - # @dynamic enforce_json_schema_version, enforce_json_schema_version= - attr_accessor :enforce_json_schema_version - # List of Elasticsearch versions you want to be able to boot. Rake tasks will be defined for each version to support booting and # halting Elasticsearch locally. If the configuration of `local_config_yaml` only configures `opensearch` as a cluster backend, # will default to an empty array. Otherwise, defaults to the versions of Elasticsearch that are exercised by the ElasticGraph test suite, as @@ -362,7 +335,6 @@ def initialize(local_config_yaml:, path_to_schema:) self.type_name_overrides = {} self.enum_value_overrides_by_type = {} self.schema_definition_extension_modules = [] - self.enforce_json_schema_version = true self.env_port_mapping = {} self.output = $stdout self.daemon_timeout = 300 @@ -394,7 +366,6 @@ def initialize(local_config_yaml:, path_to_schema:) type_name_overrides: type_name_overrides, enum_value_overrides_by_type: enum_value_overrides_by_type, extension_modules: schema_definition_extension_modules, - enforce_json_schema_version: enforce_json_schema_version, output: output ) diff --git a/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs b/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs index c984430a5..0d780ffc5 100644 --- a/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs +++ b/elasticgraph-local/sig/elastic_graph/local/rake_tasks.rbs @@ -8,7 +8,6 @@ module ElasticGraph attr_accessor type_name_overrides: ::Hash[::Symbol, ::String] attr_accessor enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]] attr_accessor schema_definition_extension_modules: ::Array[::Module] - attr_accessor enforce_json_schema_version: bool attr_accessor elasticsearch_versions: ::Array[::String] attr_accessor opensearch_versions: ::Array[::String] attr_accessor env_port_mapping: ::Hash[::String, ::Integer] diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb index 87cdfb709..80306e237 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb @@ -460,11 +460,11 @@ def results # # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly # have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this - # on every JSON schema change by setting `enforce_json_schema_version` to `false` in your `Rakefile`. + # on every JSON schema change with {#enforce_json_schema_version}. # # @param version [Integer] current version number of the JSON schema artifact # @return [void] - # @see Local::RakeTasks#enforce_json_schema_version + # @see #enforce_json_schema_version # # @example Set the JSON schema version to 1 # ElasticGraph.define_schema do |schema| @@ -484,6 +484,36 @@ def json_schema_version(version) nil end + # Configures whether {SchemaArtifactManager} enforces the requirement that the JSON schema version is incremented every time + # dumping the JSON schemas results in a changed artifact. Defaults to `true`. + # + # @note Generally speaking, you will want this to be `true` for any ElasticGraph application that is in + # production as the versioning of JSON schemas is what supports safe schema evolution as it allows + # ElasticGraph to identify which version of the JSON schema the publishing system was operating on + # when it published an event. + # + # It can be useful to set it to `false` before your application is in production, as you do not want + # to be forced to bump the version after every single schema change while you are building an initial + # prototype. + # + # @param value [Boolean] whether to require `json_schema_version` to be incremented on changes that impact `json_schemas.yaml` + # @return [void] + # @see #json_schema_version + # + # @example Disable enforcement during initial prototyping + # ElasticGraph.define_schema do |schema| + # # TODO: remove this once we're past the prototyping stage + # schema.enforce_json_schema_version false + # end + def enforce_json_schema_version(value) + unless value == true || value == false + raise Errors::SchemaError, "`enforce_json_schema_version` must be a boolean. Specified value: #{value.inspect}" + end + + @state.enforce_json_schema_version = value + nil + end + # Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the # publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to # configure this behavior. diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb index 786fbf82a..062eb2255 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb @@ -63,6 +63,10 @@ module SchemaDefinition class Factory include Mixins::HasReadableToSAndInspect.new + # @dynamic state + # @return [State] schema definition state shared with the factory's API + attr_reader :state + def initialize(state) @state = state end @@ -328,14 +332,12 @@ def new_results def new_schema_artifact_manager( schema_definition_results:, schema_artifacts_directory:, - enforce_json_schema_version:, output:, max_diff_lines: 50 ) @@schema_artifact_manager_new.call( schema_definition_results:, schema_artifacts_directory:, - enforce_json_schema_version:, output:, max_diff_lines: ) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb index 58fbe7517..8ab08390e 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/rake_tasks.rb @@ -41,12 +41,6 @@ class RakeTasks < ::Rake::TaskLib # specific enum types. For example, to rename the `DayOfWeek.MONDAY` enum to `DayOfWeek.MON`, pass `{DayOfWeek: {MONDAY: "MON"}}`. # @param extension_modules [Array] List of Ruby modules to extend onto the `SchemaDefinition::API` instance. Designed to # support ElasticGraph extension gems (such as `elasticgraph-apollo`). - # @param enforce_json_schema_version [Boolean] Whether or not to enforce the requirement that the JSON schema version is incremented - # every time dumping the JSON schemas results in a changed artifact. Generally speaking, you will want this to be `true` for any - # ElasticGraph application that is in production as the versioning of JSON schemas is what supports safe schema evolution as it - # allows ElasticGraph to identify which version of the JSON schema the publishing system was operating on when it published an - # event. It can be useful to set it to `false` before your application is in production, as you do not want to be forced to bump - # the version after every single schema change while you are building an initial prototype. # @param output [IO] used for printing task output # # @example Minimal setup with defaults @@ -117,7 +111,6 @@ def initialize( type_name_overrides: {}, enum_value_overrides_by_type: {}, extension_modules: [], - enforce_json_schema_version: true, output: $stdout ) @schema_element_names = SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new( @@ -131,7 +124,6 @@ def initialize( @index_document_sizes = index_document_sizes @path_to_schema = path_to_schema @schema_artifacts_directory = schema_artifacts_directory - @enforce_json_schema_version = enforce_json_schema_version @extension_modules = extension_modules @output = output @@ -164,7 +156,6 @@ def schema_artifact_manager schema_def_api.factory.new_schema_artifact_manager( schema_definition_results: schema_def_api.results, schema_artifacts_directory: @schema_artifacts_directory.to_s, - enforce_json_schema_version: @enforce_json_schema_version, output: @output, max_diff_lines: max_diff_lines ) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb index 56d288fae..d3e0a9a81 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb @@ -31,10 +31,9 @@ class SchemaArtifactManager # @dynamic schema_definition_results attr_reader :schema_definition_results - def initialize(schema_definition_results:, schema_artifacts_directory:, enforce_json_schema_version:, output:, max_diff_lines: 50) + def initialize(schema_definition_results:, schema_artifacts_directory:, output:, max_diff_lines: 50) @schema_definition_results = schema_definition_results @schema_artifacts_directory = schema_artifacts_directory - @enforce_json_schema_version = enforce_json_schema_version @output = output @max_diff_lines = max_diff_lines @@ -51,7 +50,7 @@ def initialize(schema_definition_results:, schema_artifacts_directory:, enforce_ # Dumps all the schema artifacts to disk. def dump_artifacts check_if_needs_json_schema_version_bump do |recommended_json_schema_version| - if @enforce_json_schema_version + if schema_definition_results.state.enforce_json_schema_version # @type var setter_location: ::Thread::Backtrace::Location # We use `_ =` because while `json_schema_version_setter_location` can be nil, # it'll never be nil if we get here and we want the type to be non-nilable. @@ -62,12 +61,12 @@ def dump_artifacts "increase the schema's version, and then run the `bundle exec rake schema_artifacts:dump` command again.\n\n" \ "To update the schema version to the expected version, change line #{setter_location.lineno} at `#{setter_location_path}` to:\n" \ " `schema.json_schema_version #{recommended_json_schema_version}`\n\n" \ - "Alternately, pass `enforce_json_schema_version: false` to `ElasticGraph::SchemaDefinition::RakeTasks.new` to allow the JSON schemas " \ - "file to change without requiring a version bump, but that is only recommended for non-production applications during initial schema prototyping." + "Alternately, call `schema.enforce_json_schema_version false` in your schema definition to allow the JSON schemas file " \ + "to change without requiring a version bump, but that is only recommended for non-production applications during initial schema prototyping." else @output.puts <<~EOS WARNING: the `json_schemas.yaml` artifact is being updated without the `json_schema_version` being correspondingly incremented. - This is not recommended for production applications, but is currently allowed because you have set `enforce_json_schema_version: false`. + This is not recommended for production applications, but is currently allowed because you have called `schema.enforce_json_schema_version false`. EOS end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb index ed1994f25..ccc4e76c5 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/state.rb @@ -43,6 +43,7 @@ class State < Struct.new( :deleted_fields_by_type_name_and_old_field_name, :json_schema_version, :json_schema_version_setter_location, + :enforce_json_schema_version, :graphql_extension_modules, :graphql_resolvers_by_name, :built_in_graphql_resolvers, @@ -93,6 +94,7 @@ def self.with( deleted_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} }, json_schema_version_setter_location: nil, json_schema_version: nil, + enforce_json_schema_version: true, graphql_extension_modules: [], graphql_resolvers_by_name: {}, built_in_graphql_resolvers: ::Set.new, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb index 7084db978..c4c10fb89 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/test_support.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "elastic_graph/errors" +require "elastic_graph/schema_artifacts/from_disk" require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names" require "elastic_graph/schema_definition/api" require "elastic_graph/schema_definition/schema_artifact_manager" @@ -76,7 +77,7 @@ def define_schema_with_schema_elements( # Set the json_schema_version to the provided value, if needed. if !json_schema_version.nil? && api.state.json_schema_version.nil? - api.json_schema_version json_schema_version + api.json_schema_version(json_schema_version) end # :nocov: -- the else branch and code past this aren't used by tests in elasticgraph-schema_definition. @@ -84,11 +85,11 @@ def define_schema_with_schema_elements( # Reloading the schema artifacts takes extra time that we don't usually want to spend (so it's opt-in) # but it can be useful in some cases because there is a bit of extra pruning/validation that it applies. + api.enforce_json_schema_version false tmp_dir = ::Dir.mktmpdir artifacts_manager = api.factory.new_schema_artifact_manager( schema_definition_results: api.results, schema_artifacts_directory: tmp_dir, - enforce_json_schema_version: false, output: ::StringIO.new ) diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs index dbd1c818b..797db12cb 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/api.rbs @@ -78,6 +78,7 @@ module ElasticGraph @results: Results? def results: () -> Results def json_schema_version: (::Integer) -> void + def enforce_json_schema_version: (bool) -> void def register_graphql_extension: (::Module, defined_at: ::String, **untyped) -> void def register_graphql_resolver: (::Symbol, ::Class, defined_at: ::String, ?built_in: bool, **untyped) -> void def on_built_in_types: () { (SchemaElements::graphQLType) -> void } -> void diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs index 978085dbb..0ef401056 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs @@ -2,6 +2,7 @@ module ElasticGraph module SchemaDefinition class Factory @state: State + attr_reader state: State def initialize: (State) -> void def self.prevent_non_factory_instantiation_of: (::Class) -> ::Method @@ -147,7 +148,6 @@ module ElasticGraph def new_schema_artifact_manager: ( schema_definition_results: Results, schema_artifacts_directory: ::String, - enforce_json_schema_version: bool, output: io, ?max_diff_lines: ::Integer ) -> SchemaArtifactManager diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs index a6cde8232..61d7e76ea 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/rake_tasks.rbs @@ -11,7 +11,6 @@ module ElasticGraph ?type_name_overrides: ::Hash[::Symbol, ::String], ?enum_value_overrides_by_type: ::Hash[::Symbol, ::Hash[::Symbol, ::String]], ?extension_modules: ::Array[::Module], - ?enforce_json_schema_version: bool, ?output: io ) -> void @@ -24,7 +23,6 @@ module ElasticGraph @index_document_sizes: bool @path_to_schema: ::String | ::Pathname @schema_artifacts_directory: ::String | ::Pathname - @enforce_json_schema_version: bool @extension_modules: ::Array[::Module] @output: io diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs index b4b079de0..7c34a8824 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs @@ -6,7 +6,6 @@ module ElasticGraph def initialize: ( schema_definition_results: Results, schema_artifacts_directory: ::String, - enforce_json_schema_version: bool, output: io, ?max_diff_lines: ::Integer ) -> void @@ -18,7 +17,6 @@ module ElasticGraph @schema_definition_results: Results @schema_artifacts_directory: ::String - @enforce_json_schema_version: bool @output: io @max_diff_lines: ::Integer @artifacts: ::Array[SchemaArtifact[untyped]]? diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs index f80f19f24..bb9a73291 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/state.rbs @@ -20,6 +20,7 @@ module ElasticGraph attr_reader deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] attr_accessor json_schema_version: ::Integer? attr_accessor json_schema_version_setter_location: ::Thread::Backtrace::Location? + attr_accessor enforce_json_schema_version: bool attr_reader graphql_extension_modules: ::Array[SchemaArtifacts::RuntimeMetadata::GraphQLExtension] attr_reader graphql_resolvers_by_name: ::Hash[::Symbol, SchemaArtifacts::RuntimeMetadata::GraphQLResolver] attr_reader built_in_graphql_resolvers: ::Set[::Symbol] @@ -56,6 +57,7 @@ module ElasticGraph deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]], json_schema_version: Integer?, json_schema_version_setter_location: ::Thread::Backtrace::Location?, + enforce_json_schema_version: bool, graphql_extension_modules: ::Array[SchemaArtifacts::RuntimeMetadata::GraphQLExtension], graphql_resolvers_by_name: ::Hash[::Symbol, SchemaArtifacts::RuntimeMetadata::GraphQLResolver], built_in_graphql_resolvers: ::Set[::Symbol], diff --git a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb index 709206b5d..54a235ec8 100644 --- a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb +++ b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb @@ -216,8 +216,10 @@ module SchemaDefinition "`schema.json_schema_version 4`" ).and matching(json_schema_version_setter_location_regex) + write_elastic_graph_schema_def_code(component_suffix: "3", json_schema_version: 3, component_extras: "t.renamed_from 'Component'", enforce_json_schema_version: false) + expect { - output = run_rake("schema_artifacts:dump", enforce_json_schema_version: false) + output = run_rake("schema_artifacts:dump") expect(output.lines).to include( a_string_including("Dumped", JSON_SCHEMAS_FILE), a_string_including("Dumped", versioned_json_schema_file(3)) @@ -492,10 +494,10 @@ module SchemaDefinition }) ) - # Here we add a different new field (`ordinal: Int!`), without bumping the version (and using `enforce_json_schema_version: false` - # to not have to bump the version)... - write_elastic_graph_schema_def_code(json_schema_version: 2, component_name_extras: "\nt.field 'ordinal', 'Int!'") - run_rake("schema_artifacts:dump", enforce_json_schema_version: false) + # Here we add a different new field (`ordinal: Int!`), without bumping the version, and disable + # enforcement so we do not have to bump the version. + write_elastic_graph_schema_def_code(json_schema_version: 2, component_name_extras: "\nt.field 'ordinal', 'Int!'", enforce_json_schema_version: false) + run_rake("schema_artifacts:dump") # It should not be added to the v1 schema... loaded_v1 = ::YAML.safe_load(read_artifact(versioned_json_schema_file(1))) @@ -859,7 +861,6 @@ module SchemaDefinition index_document_sizes: true, path_to_schema: "\#{project_root}/config/schema.rb", schema_artifacts_directory: "\#{project_root}/config/schema/artifacts", - enforce_json_schema_version: false ) EOS @@ -901,7 +902,7 @@ def expect_successful_run_of(*shell_commands) /line 7 at `(\S*\/?)schema\.rb`/ end - def write_elastic_graph_schema_def_code(json_schema_version:, component_suffix: "", extra_sdl: "", component_name_extras: "", component_extras: "", omit_component_name_field: false) + def write_elastic_graph_schema_def_code(json_schema_version:, enforce_json_schema_version: true, component_suffix: "", extra_sdl: "", component_name_extras: "", component_extras: "", omit_component_name_field: false) code = <<~EOS Thread.current[:eg_schema_load_count] = (Thread.current[:eg_schema_load_count] || 0) + 1 if Thread.current[:eg_schema_load_count] > 1 @@ -910,6 +911,7 @@ def write_elastic_graph_schema_def_code(json_schema_version:, component_suffix: ElasticGraph.define_schema do |schema| schema.json_schema_version #{json_schema_version} + #{"schema.enforce_json_schema_version false" unless enforce_json_schema_version} schema.enum_type "Size" do |t| t.values "SMALL", "MEDIUM", "LAGE" end @@ -966,6 +968,7 @@ def runtime_metadata_for_elastic_graph_schema_def_code(include_date_time_fields: ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false schema.object_type "MyType" do |t| t.field "id", "ID!" @@ -976,7 +979,7 @@ def runtime_metadata_for_elastic_graph_schema_def_code(include_date_time_fields: end EOS - run_rake("schema_artifacts:dump", enforce_json_schema_version: false) + run_rake("schema_artifacts:dump") ::YAML.safe_load(read_artifact(RUNTIME_METADATA_FILE)) end @@ -1059,7 +1062,6 @@ def versioned_json_schema_file(version) def run_rake( *args, - enforce_json_schema_version: true, pretend_tty: false, path_to_schema: "schema.rb", include_extension_module: true, @@ -1086,7 +1088,6 @@ def as_active_instance index_document_sizes: true, path_to_schema: path_to_schema, schema_artifacts_directory: "config/schema/artifacts", - enforce_json_schema_version: enforce_json_schema_version, extension_modules: [extension_module].compact, derived_type_name_formats: derived_type_name_formats, type_name_overrides: type_name_overrides, diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb index c11a97ae2..90979c46b 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb @@ -2942,6 +2942,22 @@ def link_supertype_to_subtypes(interface_type, *subtype_names) expect(result[JSON_SCHEMA_VERSION_KEY]).to eq(1) end + it "allows json_schema_version enforcement to be disabled" do + result = define_schema(schema_element_name_form: "snake_case") do |s| + s.enforce_json_schema_version false + end + + expect(result.state.enforce_json_schema_version).to eq false + end + + it "fails if json_schema_version enforcement is set to a non-boolean value" do + expect { + define_schema(schema_element_name_form: "snake_case") do |s| + s.enforce_json_schema_version nil + end + }.to raise_error(Errors::SchemaError, a_string_including("must be a boolean", "nil")) + end + it "fails if json_schema_version is set to invalid values" do expect { define_schema(schema_element_name_form: "snake_case") do |s| diff --git a/elasticgraph-warehouse/README.md b/elasticgraph-warehouse/README.md index 30a894a1e..6890bbe63 100644 --- a/elasticgraph-warehouse/README.md +++ b/elasticgraph-warehouse/README.md @@ -53,15 +53,14 @@ index 2943335..26633c3 100644 require "elastic_graph/local/rake_tasks" require "elastic_graph/query_registry/rake_tasks" require "rspec/core/rake_task" -@@ -12,6 +13,8 @@ ElasticGraph::Local::RakeTasks.new( +@@ -12,5 +13,7 @@ ElasticGraph::Local::RakeTasks.new( local_config_yaml: settings_file, path_to_schema: "#{project_root}/config/schema.rb" ) do |tasks| + tasks.schema_definition_extension_modules << ElasticGraph::Warehouse::SchemaDefinition::APIExtension + - # Set this to true once you're beyond the prototyping stage. - tasks.enforce_json_schema_version = false - + # Determines casing of field names. Can be either `:camelCase` or `:snake_case`. + tasks.schema_element_name_form = :camelCase ``` After running `bundle exec rake schema_artifacts:dump`, a `data_warehouse.yaml` file will be diff --git a/elasticgraph-warehouse/spec/integration/elastic_graph/warehouse/schema_definition/rake_tasks_spec.rb b/elasticgraph-warehouse/spec/integration/elastic_graph/warehouse/schema_definition/rake_tasks_spec.rb index 2a6e74225..da74eb579 100644 --- a/elasticgraph-warehouse/spec/integration/elastic_graph/warehouse/schema_definition/rake_tasks_spec.rb +++ b/elasticgraph-warehouse/spec/integration/elastic_graph/warehouse/schema_definition/rake_tasks_spec.rb @@ -85,7 +85,7 @@ module SchemaDefinition original_content = read_warehouse_artifact expect(original_content).not_to include("added_field") - write_warehouse_schema(table_defs: <<~EOS) + write_warehouse_schema(enforce_json_schema_version: false, table_defs: <<~EOS) s.object_type "Product" do |t| t.field "id", "ID" t.field "added_field", "String" @@ -94,7 +94,7 @@ module SchemaDefinition EOS expect { - run_rake_with_warehouse("schema_artifacts:dump", enforce_json_schema_version: false) + run_rake_with_warehouse("schema_artifacts:dump") }.to change { read_warehouse_artifact } .from(original_content) .to(a_string_including("added_field")) @@ -157,10 +157,11 @@ module SchemaDefinition end end - def write_warehouse_schema(table_defs:) + def write_warehouse_schema(table_defs:, enforce_json_schema_version: true) ::File.write("schema.rb", <<~EOS) ElasticGraph.define_schema do |s| s.json_schema_version 1 + #{"s.enforce_json_schema_version false" unless enforce_json_schema_version} # Add a dummy indexed type to ensure the Query type has at least one field. # This prevents GraphQL-Ruby warnings about empty Query types in tests. @@ -177,14 +178,13 @@ def write_warehouse_schema(table_defs:) EOS end - def run_rake_with_warehouse(*args, enforce_json_schema_version: true) + def run_rake_with_warehouse(*args) run_rake(*args) do |output| ElasticGraph::SchemaDefinition::RakeTasks.new( schema_element_name_form: :snake_case, index_document_sizes: false, path_to_schema: "schema.rb", schema_artifacts_directory: "config/schema/artifacts", - enforce_json_schema_version: enforce_json_schema_version, extension_modules: [Warehouse::SchemaDefinition::APIExtension], output: output ) diff --git a/elasticgraph/lib/elastic_graph/project_template/Rakefile.tt b/elasticgraph/lib/elastic_graph/project_template/Rakefile.tt index 9ca5c55f7..7507ebfbe 100644 --- a/elasticgraph/lib/elastic_graph/project_template/Rakefile.tt +++ b/elasticgraph/lib/elastic_graph/project_template/Rakefile.tt @@ -12,9 +12,6 @@ ElasticGraph::Local::RakeTasks.new( local_config_yaml: settings_file, path_to_schema: "#{project_root}/config/schema.rb" ) do |tasks| - # Set this to true once you're beyond the prototyping stage. - tasks.enforce_json_schema_version = false - # Determines casing of field names. Can be either `:camelCase` or `:snake_case`. tasks.schema_element_name_form = :camelCase diff --git a/elasticgraph/lib/elastic_graph/project_template/config/schema.rb.tt b/elasticgraph/lib/elastic_graph/project_template/config/schema.rb.tt index fb70616d4..69792447b 100644 --- a/elasticgraph/lib/elastic_graph/project_template/config/schema.rb.tt +++ b/elasticgraph/lib/elastic_graph/project_template/config/schema.rb.tt @@ -2,6 +2,9 @@ ElasticGraph.define_schema do |schema| # ElasticGraph will tell you when you need to bump this. schema.json_schema_version 1 + # Set this to true once you're beyond the prototyping stage. + schema.enforce_json_schema_version false + # This registers the elasticgraph-query_registry extension, which can be used to reject queries that # clients have not registered (and to reject queries that differ from what a client has registered). # In addition, every registered query is validated against the schema in the CI build, giving you From 2bf98127318054bb53669b5cfa5881cb5405e484 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 27 May 2026 11:12:26 -0500 Subject: [PATCH 3/6] Move JSON schema version enforcement into schemas --- config/site/examples/music/Rakefile | 1 - config/site/examples/schema_customization_rake_tasks/Rakefile | 1 - config/site/examples/schema_customization_rake_tasks/schema.rb | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/config/site/examples/music/Rakefile b/config/site/examples/music/Rakefile index 52404e6e9..c54dd64f6 100644 --- a/config/site/examples/music/Rakefile +++ b/config/site/examples/music/Rakefile @@ -13,7 +13,6 @@ ElasticGraph::Local::RakeTasks.new( path_to_schema: File.expand_path("schema.rb", __dir__) ) do |tasks| tasks.opensearch_versions = [] - tasks.enforce_json_schema_version = false end # :nocov: -- only used for manual live-server query validation, not during the build diff --git a/config/site/examples/schema_customization_rake_tasks/Rakefile b/config/site/examples/schema_customization_rake_tasks/Rakefile index 5a8baaf22..26fae769b 100644 --- a/config/site/examples/schema_customization_rake_tasks/Rakefile +++ b/config/site/examples/schema_customization_rake_tasks/Rakefile @@ -13,7 +13,6 @@ ElasticGraph::Local::RakeTasks.new( path_to_schema: File.expand_path("schema.rb", __dir__) ) do |tasks| tasks.opensearch_versions = [] - tasks.enforce_json_schema_version = false # :snippet-start: schema_element_name_form # Within `ElasticGraph::Local::RakeTasks.new { ... }` in your `Rakefile`: diff --git a/config/site/examples/schema_customization_rake_tasks/schema.rb b/config/site/examples/schema_customization_rake_tasks/schema.rb index 85981b246..cf40f3276 100644 --- a/config/site/examples/schema_customization_rake_tasks/schema.rb +++ b/config/site/examples/schema_customization_rake_tasks/schema.rb @@ -8,6 +8,7 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false schema.object_type "Widget" do |t| t.field "id", "ID" From 79775125221a783b933ee52c535224db7c193775 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 27 May 2026 13:35:33 -0500 Subject: [PATCH 4/6] Disable schema version enforcement in site examples --- config/site/examples/namespaced_queries/schema.rb | 1 + config/site/examples/nested_namespaced_queries/schema.rb | 1 + config/site/examples/schema_customization/schema.rb | 1 + 3 files changed, 3 insertions(+) diff --git a/config/site/examples/namespaced_queries/schema.rb b/config/site/examples/namespaced_queries/schema.rb index b3c893c6d..1d9150c1a 100644 --- a/config/site/examples/namespaced_queries/schema.rb +++ b/config/site/examples/namespaced_queries/schema.rb @@ -8,6 +8,7 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false end # :snippet-start: namespace_type diff --git a/config/site/examples/nested_namespaced_queries/schema.rb b/config/site/examples/nested_namespaced_queries/schema.rb index fd4ba1472..dccc84cdc 100644 --- a/config/site/examples/nested_namespaced_queries/schema.rb +++ b/config/site/examples/nested_namespaced_queries/schema.rb @@ -8,6 +8,7 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false end # :snippet-start: nested_namespace_type diff --git a/config/site/examples/schema_customization/schema.rb b/config/site/examples/schema_customization/schema.rb index 8b50c579a..4ef07c9d1 100644 --- a/config/site/examples/schema_customization/schema.rb +++ b/config/site/examples/schema_customization/schema.rb @@ -8,6 +8,7 @@ ElasticGraph.define_schema do |schema| schema.json_schema_version 1 + schema.enforce_json_schema_version false # :snippet-start: on_each_generated_schema_element # Within `ElasticGraph.define_schema { ... }` in your schema definition: From c82252d8290820b8c2d73ba30993f3797596c804 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 20 May 2026 14:41:27 -0500 Subject: [PATCH 5/6] Move JSON schema helpers to json ingestion gem --- CODEBASE_OVERVIEW.md | 3 +++ Gemfile.lock | 1 + config/docker_demo/Dockerfile | 1 + elasticgraph-apollo/apollo_tests_implementation/Dockerfile | 1 + elasticgraph-apollo/apollo_tests_implementation/Gemfile | 1 + elasticgraph-json_ingestion/README.md | 3 +++ .../schema_definition/indexing/event_envelope.rb | 0 .../indexing/json_schema_field_metadata.rb | 0 .../schema_definition/indexing/json_schema_with_metadata.rb | 0 .../json_ingestion}/schema_definition/json_schema_pruner.rb | 0 .../schema_definition/indexing/event_envelope.rbs | 0 .../indexing/json_schema_field_metadata.rbs | 0 .../indexing/json_schema_with_metadata.rbs | 0 .../schema_definition/json_schema_pruner.rbs | 0 elasticgraph-schema_definition/README.md | 3 +++ .../elasticgraph-schema_definition.gemspec | 1 + .../lib/elastic_graph/schema_definition/indexing/field.rb | 2 +- .../lib/elastic_graph/schema_definition/results.rb | 6 +++--- .../schema_definition/schema_artifact_manager.rb | 2 +- .../indexing/json_schema_with_metadata_spec.rb | 2 +- .../schema_definition/json_schema_pruner_spec.rb | 2 +- elasticgraph/lib/elastic_graph/project_template/Gemfile.tt | 1 + 22 files changed, 22 insertions(+), 7 deletions(-) rename {elasticgraph-schema_definition/lib/elastic_graph => elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion}/schema_definition/indexing/event_envelope.rb (100%) rename {elasticgraph-schema_definition/lib/elastic_graph => elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion}/schema_definition/indexing/json_schema_field_metadata.rb (100%) rename {elasticgraph-schema_definition/lib/elastic_graph => elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion}/schema_definition/indexing/json_schema_with_metadata.rb (100%) rename {elasticgraph-schema_definition/lib/elastic_graph => elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion}/schema_definition/json_schema_pruner.rb (100%) rename {elasticgraph-schema_definition/sig/elastic_graph => elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion}/schema_definition/indexing/event_envelope.rbs (100%) rename {elasticgraph-schema_definition/sig/elastic_graph => elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion}/schema_definition/indexing/json_schema_field_metadata.rbs (100%) rename {elasticgraph-schema_definition/sig/elastic_graph => elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion}/schema_definition/indexing/json_schema_with_metadata.rbs (100%) rename {elasticgraph-schema_definition/sig/elastic_graph => elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion}/schema_definition/json_schema_pruner.rbs (100%) diff --git a/CODEBASE_OVERVIEW.md b/CODEBASE_OVERVIEW.md index a7f4b58ea..8cc77e3b8 100644 --- a/CODEBASE_OVERVIEW.md +++ b/CODEBASE_OVERVIEW.md @@ -111,6 +111,7 @@ graph LR; rackup["rackup"]; rake["rake"]; webrick["webrick"]; + elasticgraph-json_ingestion["eg-json_ingestion"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; graphql["graphql"]; elasticgraph --> elasticgraph-support; @@ -125,6 +126,7 @@ graph LR; elasticgraph-local --> webrick; elasticgraph-schema_definition --> elasticgraph-graphql; elasticgraph-schema_definition --> elasticgraph-indexer; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; elasticgraph-schema_definition --> elasticgraph-schema_artifacts; elasticgraph-schema_definition --> elasticgraph-support; elasticgraph-schema_definition --> graphql; @@ -141,6 +143,7 @@ graph LR; class rackup externalGemCatStyle; class rake externalGemCatStyle; class webrick externalGemCatStyle; + class elasticgraph-json_ingestion otherEgGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class graphql externalGemCatStyle; click thor href "https://rubygems.org/gems/thor" "Open on RubyGems.org" _blank; diff --git a/Gemfile.lock b/Gemfile.lock index 9e18f46ec..1786c742b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -198,6 +198,7 @@ PATH elasticgraph-schema_definition (1.2.1.pre) elasticgraph-graphql (= 1.2.1.pre) elasticgraph-indexer (= 1.2.1.pre) + elasticgraph-json_ingestion (= 1.2.1.pre) elasticgraph-schema_artifacts (= 1.2.1.pre) elasticgraph-support (= 1.2.1.pre) graphql (~> 2.6.2) diff --git a/config/docker_demo/Dockerfile b/config/docker_demo/Dockerfile index c67a71782..2110740fd 100644 --- a/config/docker_demo/Dockerfile +++ b/config/docker_demo/Dockerfile @@ -16,6 +16,7 @@ COPY elasticgraph-datastore_core elasticgraph-datastore_core/ COPY elasticgraph-graphiql elasticgraph-graphiql/ COPY elasticgraph-graphql elasticgraph-graphql/ COPY elasticgraph-indexer elasticgraph-indexer/ +COPY elasticgraph-json_ingestion elasticgraph-json_ingestion/ COPY elasticgraph-local elasticgraph-local/ COPY elasticgraph-opensearch elasticgraph-opensearch/ COPY elasticgraph-query_registry elasticgraph-query_registry/ diff --git a/elasticgraph-apollo/apollo_tests_implementation/Dockerfile b/elasticgraph-apollo/apollo_tests_implementation/Dockerfile index 23442ab26..f2cd8c122 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/Dockerfile +++ b/elasticgraph-apollo/apollo_tests_implementation/Dockerfile @@ -14,6 +14,7 @@ COPY elasticgraph-elasticsearch /web/elasticgraph-elasticsearch COPY elasticgraph-graphiql /web/elasticgraph-graphiql COPY elasticgraph-graphql /web/elasticgraph-graphql COPY elasticgraph-indexer /web/elasticgraph-indexer +COPY elasticgraph-json_ingestion /web/elasticgraph-json_ingestion COPY elasticgraph-rack /web/elasticgraph-rack COPY elasticgraph-schema_artifacts /web/elasticgraph-schema_artifacts COPY elasticgraph-schema_definition /web/elasticgraph-schema_definition diff --git a/elasticgraph-apollo/apollo_tests_implementation/Gemfile b/elasticgraph-apollo/apollo_tests_implementation/Gemfile index f082fa258..60b08ec0d 100644 --- a/elasticgraph-apollo/apollo_tests_implementation/Gemfile +++ b/elasticgraph-apollo/apollo_tests_implementation/Gemfile @@ -16,6 +16,7 @@ source "https://rubygems.org" graphiql graphql indexer + json_ingestion rack schema_artifacts schema_definition diff --git a/elasticgraph-json_ingestion/README.md b/elasticgraph-json_ingestion/README.md index db1eb3988..38d29c1d1 100644 --- a/elasticgraph-json_ingestion/README.md +++ b/elasticgraph-json_ingestion/README.md @@ -17,4 +17,7 @@ graph LR; elasticgraph-support["elasticgraph-support"]; elasticgraph-json_ingestion --> elasticgraph-support; class elasticgraph-support otherEgGemStyle; + elasticgraph-schema_definition["elasticgraph-schema_definition"]; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; + class elasticgraph-schema_definition otherEgGemStyle; ``` diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb similarity index 100% rename from elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/event_envelope.rb rename to elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb similarity index 100% rename from elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb rename to elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb similarity index 100% rename from elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb rename to elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb similarity index 100% rename from elasticgraph-schema_definition/lib/elastic_graph/schema_definition/json_schema_pruner.rb rename to elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/event_envelope.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs similarity index 100% rename from elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/event_envelope.rbs rename to elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs similarity index 100% rename from elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rbs rename to elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs similarity index 100% rename from elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rbs rename to elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs similarity index 100% rename from elasticgraph-schema_definition/sig/elastic_graph/schema_definition/json_schema_pruner.rbs rename to elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs diff --git a/elasticgraph-schema_definition/README.md b/elasticgraph-schema_definition/README.md index b85f79b9e..5c06af483 100644 --- a/elasticgraph-schema_definition/README.md +++ b/elasticgraph-schema_definition/README.md @@ -21,6 +21,9 @@ graph LR; elasticgraph-indexer["elasticgraph-indexer"]; elasticgraph-schema_definition --> elasticgraph-indexer; class elasticgraph-indexer otherEgGemStyle; + elasticgraph-json_ingestion["elasticgraph-json_ingestion"]; + elasticgraph-schema_definition --> elasticgraph-json_ingestion; + class elasticgraph-json_ingestion otherEgGemStyle; elasticgraph-schema_artifacts["elasticgraph-schema_artifacts"]; elasticgraph-schema_definition --> elasticgraph-schema_artifacts; class elasticgraph-schema_artifacts otherEgGemStyle; diff --git a/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec b/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec index 5703f516d..da50e4335 100644 --- a/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec +++ b/elasticgraph-schema_definition/elasticgraph-schema_definition.gemspec @@ -43,6 +43,7 @@ Gem::Specification.new do |spec| spec.add_dependency "elasticgraph-graphql", ElasticGraph::VERSION # needed since we validate that scalar `coerce_with` options are valid (which loads scalar coercion adapters) spec.add_dependency "elasticgraph-indexer", ElasticGraph::VERSION # needed since we validate that scalar `prepare_for_indexing_with` options are valid (which loads indexing preparer adapters) + spec.add_dependency "elasticgraph-json_ingestion", ElasticGraph::VERSION spec.add_dependency "elasticgraph-schema_artifacts", ElasticGraph::VERSION spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION spec.add_dependency "graphql", "~> 2.6.2" diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb index 5b0c0db1c..fa215a5f1 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb @@ -7,7 +7,7 @@ # frozen_string_literal: true require "elastic_graph/constants" -require "elastic_graph/schema_definition/indexing/json_schema_field_metadata" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata" require "elastic_graph/schema_definition/indexing/list_counts_mapping" require "elastic_graph/support/hash_util" require "elastic_graph/support/memoizable_data" diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index 59a8c3891..160e3c854 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -8,10 +8,10 @@ require "elastic_graph/constants" require "elastic_graph/errors" -require "elastic_graph/schema_artifacts/runtime_metadata/schema" +require "elastic_graph/json_ingestion/schema_definition/indexing/event_envelope" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" require "elastic_graph/schema_artifacts/artifacts_helper_methods" -require "elastic_graph/schema_definition/indexing/event_envelope" -require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" +require "elastic_graph/schema_artifacts/runtime_metadata/schema" require "elastic_graph/schema_definition/indexing/relationship_resolver" require "elastic_graph/schema_definition/indexing/update_target_resolver" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb index d3e0a9a81..b1c3c3f1a 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb @@ -8,7 +8,7 @@ require "did_you_mean" require "elastic_graph/constants" -require "elastic_graph/schema_definition/json_schema_pruner" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" require "elastic_graph/support/graphql_gem_loader" require "elastic_graph/support/memoizable_data" require "fileutils" diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb index bd24d3c13..22fc317ca 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb @@ -6,8 +6,8 @@ # # frozen_string_literal: true +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" require "elastic_graph/spec_support/schema_definition_helpers" -require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" module ElasticGraph module SchemaDefinition diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb index af9298366..66c140101 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb @@ -7,8 +7,8 @@ # frozen_string_literal: true require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" require "elastic_graph/spec_support/schema_definition_helpers" -require "elastic_graph/schema_definition/json_schema_pruner" module ElasticGraph module SchemaDefinition diff --git a/elasticgraph/lib/elastic_graph/project_template/Gemfile.tt b/elasticgraph/lib/elastic_graph/project_template/Gemfile.tt index db8476b02..88c057f63 100644 --- a/elasticgraph/lib/elastic_graph/project_template/Gemfile.tt +++ b/elasticgraph/lib/elastic_graph/project_template/Gemfile.tt @@ -7,6 +7,7 @@ gem "graphql-c_parser", "~> 1.1", platforms: :ruby elasticgraph_details = <%= ElasticGraph.setup_env.gemfile_elasticgraph_details_code_snippet %> gem "elasticgraph-local", *elasticgraph_details +gem "elasticgraph-json_ingestion", *elasticgraph_details gem "elasticgraph-query_registry", *elasticgraph_details # Can be elasticgraph-elasticsearch or elasticgraph-opensearch based on the datastore you want to use. From ffec0c42a218747e49358e731bb4d2920aa5a58e Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 20 May 2026 14:44:36 -0500 Subject: [PATCH 6/6] Namespace JSON schema helpers under JSON ingestion --- elasticgraph-json_ingestion/README.md | 4 +- .../indexing/event_envelope.rb | 135 +++---- .../indexing/json_schema_field_metadata.rb | 40 +- .../indexing/json_schema_with_metadata.rb | 359 +++++++++--------- .../schema_definition/json_schema_pruner.rb | 80 ++-- .../indexing/event_envelope.rbs | 10 +- .../indexing/json_schema_field_metadata.rbs | 22 +- .../indexing/json_schema_with_metadata.rbs | 163 ++++---- .../schema_definition/json_schema_pruner.rbs | 15 +- .../elastic_graph/schema_definition/api.rb | 2 +- .../schema_definition/indexing/field.rb | 2 +- .../schema_definition/results.rb | 6 +- .../schema_artifact_manager.rb | 6 +- .../schema_definition/indexing/field.rbs | 2 +- .../schema_definition/indexing/field_type.rbs | 2 +- .../schema_definition/results.rbs | 10 +- .../schema_artifact_manager.rbs | 4 +- .../schema_definition/rake_tasks_spec.rb | 6 +- .../json_schema_with_metadata_spec.rb | 4 +- .../json_schema_field_metadata_spec.rb | 3 +- .../json_schema_pruner_spec.rb | 2 +- 21 files changed, 454 insertions(+), 423 deletions(-) diff --git a/elasticgraph-json_ingestion/README.md b/elasticgraph-json_ingestion/README.md index 38d29c1d1..c5f2fae01 100644 --- a/elasticgraph-json_ingestion/README.md +++ b/elasticgraph-json_ingestion/README.md @@ -2,8 +2,8 @@ JSON Schema ingestion support for ElasticGraph. -This gem provides the schema-definition extension that generates JSON Schema artifacts for indexing -events and validates JSON-ingestion-specific schema options. +This gem contains the JSON Schema helper code used by schema definition to generate indexing +event schemas and merge ElasticGraph metadata into versioned schema artifacts. ## Dependency Diagram diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb index 605024146..3b0803f75 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb @@ -9,76 +9,79 @@ require "elastic_graph/constants" module ElasticGraph - module SchemaDefinition - module Indexing - # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events. - # - # @api private - module EventEnvelope - # @param indexed_type_names [Array] names of the indexed types - # @param json_schema_version [Integer] the version of the JSON schema - # @return [Hash] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`. - def self.json_schema(indexed_type_names, json_schema_version) - { - "type" => "object", - "description" => "Required by ElasticGraph to wrap every data event.", - "properties" => { - "op" => { - "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", - "type" => "string", - "enum" => %w[upsert] - }, - "type" => { - "description" => "The type of object present in `record`.", - "type" => "string", - # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent. - "enum" => indexed_type_names.sort - }, - "id" => { - "description" => "The unique identifier of the record.", - "type" => "string", - "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH - }, - "version" => { - "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', - "type" => "integer", - "minimum" => 0, - "maximum" => (2**63) - 1 - }, - "record" => { - "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", - "type" => "object" - }, - "latency_timestamps" => { - "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", - "type" => "object", - "additionalProperties" => false, - "patternProperties" => { - "^\\w+_at$" => { - "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", - "type" => "string", - "format" => "date-time" + module JSONIngestion + module SchemaDefinition + # Indexing support used while generating JSON ingestion schemas. + module Indexing + # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events. + # + # @api private + module EventEnvelope + # @param indexed_type_names [Array] names of the indexed types + # @param json_schema_version [Integer] the version of the JSON schema + # @return [Hash] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`. + def self.json_schema(indexed_type_names, json_schema_version) + { + "type" => "object", + "description" => "Required by ElasticGraph to wrap every data event.", + "properties" => { + "op" => { + "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", + "type" => "string", + "enum" => %w[upsert] + }, + "type" => { + "description" => "The type of object present in `record`.", + "type" => "string", + # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent. + "enum" => indexed_type_names.sort + }, + "id" => { + "description" => "The unique identifier of the record.", + "type" => "string", + "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH + }, + "version" => { + "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', + "type" => "integer", + "minimum" => 0, + "maximum" => (2**63) - 1 + }, + "record" => { + "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", + "type" => "object" + }, + "latency_timestamps" => { + "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", + "type" => "object", + "additionalProperties" => false, + "patternProperties" => { + "^\\w+_at$" => { + "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", + "type" => "string", + "format" => "date-time" + } } + }, + JSON_SCHEMA_VERSION_KEY => { + "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", + "const" => json_schema_version + }, + "message_id" => { + "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", + "type" => "string" } }, - JSON_SCHEMA_VERSION_KEY => { - "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", - "const" => json_schema_version + "additionalProperties" => false, + "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], + "if" => { + "properties" => { + "op" => {"const" => "upsert"} + } }, - "message_id" => { - "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", - "type" => "string" - } - }, - "additionalProperties" => false, - "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], - "if" => { - "properties" => { - "op" => {"const" => "upsert"} - } - }, - "then" => {"required" => ["record"]} - } + "then" => {"required" => ["record"]} + } + end end end end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb index 535d11b2d..83c94b9d4 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb @@ -7,27 +7,29 @@ # frozen_string_literal: true module ElasticGraph - module SchemaDefinition - module Indexing - # @!parse class JSONSchemaFieldMetadata; end - JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index) + module JSONIngestion + module SchemaDefinition + module Indexing + # @!parse class JSONSchemaFieldMetadata; end + JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index) - # Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas - # alongside the JSON schema fields. - # - # @!attribute [r] type - # @return [String] name of the ElasticGraph type for this field - # @!attribute [r] name_in_index - # @return [String] name of the field in the index - # - # @api private - class JSONSchemaFieldMetadata < ::Data - # @return [Hash] hash form of the metadata that can be dumped in JSON schema - def to_dumpable_hash - {"type" => type, "nameInIndex" => name_in_index} - end + # Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas + # alongside the JSON schema fields. + # + # @!attribute [r] type + # @return [String] name of the ElasticGraph type for this field + # @!attribute [r] name_in_index + # @return [String] name of the field in the index + # + # @api private + class JSONSchemaFieldMetadata < ::Data + # @return [Hash] hash form of the metadata that can be dumped in JSON schema + def to_dumpable_hash + {"type" => type, "nameInIndex" => name_in_index} + end - # @dynamic initialize, type, name_in_index + # @dynamic initialize, type, name_in_index + end end end end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb index a56d9c2e7..2f62a32fe 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb @@ -9,225 +9,232 @@ require "elastic_graph/constants" module ElasticGraph - module SchemaDefinition - module Indexing - # Represents the result of merging a JSON schema with metadata. The result includes both - # the merged JSON schema and a list of `failed_fields` indicating which fields metadata - # could not be determined for. - # - # @private - class JSONSchemaWithMetadata < ::Data.define( - # The JSON schema. - :json_schema, - # A set of fields (in the form `Type.field`) that were needed but not found. - :missing_fields, - # A set of type names that were needed but not found. - :missing_types, - # A set of `DeprecatedElement` objects that create conflicting definitions. - :definition_conflicts, - # A set of fields that have been deleted but that must be retained (e.g. for custom shard routing or rollover) - :missing_necessary_fields - ) - def json_schema_version - json_schema.fetch(JSON_SCHEMA_VERSION_KEY) - end - - # Responsible for building `JSONSchemaWithMetadata` instances. + module JSONIngestion + module SchemaDefinition + module Indexing + # Represents the result of merging a JSON schema with ElasticGraph metadata. + # The result includes the merged JSON schema plus details about schema elements + # from prior versions that could not be matched to current metadata. + # + # @!attribute [r] json_schema + # @return [Hash] the merged JSON schema + # @!attribute [r] missing_fields + # @return [Set] fields (in the form `Type.field`) that were needed but not found + # @!attribute [r] missing_types + # @return [Set] type names that were needed but not found + # @!attribute [r] definition_conflicts + # @return [Set] deprecated elements that create conflicting definitions + # @!attribute [r] missing_necessary_fields + # @return [Array] deleted fields that must be retained for routing or rollover # # @private - class Merger - # @dynamic unused_deprecated_elements - attr_reader :unused_deprecated_elements - - def initialize(schema_def_results) - @field_metadata_by_type_and_field_name = schema_def_results.json_schema_field_metadata_by_type_and_field_name - @renamed_types_by_old_name = schema_def_results.state.renamed_types_by_old_name - @deleted_types_by_old_name = schema_def_results.state.deleted_types_by_old_name - @renamed_fields_by_type_name_and_old_field_name = schema_def_results.state.renamed_fields_by_type_name_and_old_field_name - @deleted_fields_by_type_name_and_old_field_name = schema_def_results.state.deleted_fields_by_type_name_and_old_field_name - @state = schema_def_results.state - @derived_indexing_type_names = schema_def_results.derived_indexing_type_names - - @unused_deprecated_elements = ( - @renamed_types_by_old_name.values + - @deleted_types_by_old_name.values + - @renamed_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + - @deleted_fields_by_type_name_and_old_field_name.values.flat_map(&:values) - ).to_set + class JSONSchemaWithMetadata < ::Data.define( + # The JSON schema. + :json_schema, + # A set of fields (in the form `Type.field`) that were needed but not found. + :missing_fields, + # A set of type names that were needed but not found. + :missing_types, + # A set of `DeprecatedElement` objects that create conflicting definitions. + :definition_conflicts, + # A set of fields that have been deleted but that must be retained (e.g. for custom shard routing or rollover) + :missing_necessary_fields + ) + def json_schema_version + json_schema.fetch(JSON_SCHEMA_VERSION_KEY) end - def merge_metadata_into(json_schema) - missing_fields = ::Set.new - missing_types = ::Set.new - definition_conflicts = ::Set.new - old_type_name_by_current_name = {} # : ::Hash[String, String] - - defs = json_schema.fetch("$defs").to_h do |type_name, type_def| - if type_name != EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"]) - current_type_name = determine_current_type_name( - type_name, - missing_types: missing_types, - definition_conflicts: definition_conflicts - ) - - if current_type_name - old_type_name_by_current_name[current_type_name] = type_name - end + # Responsible for building `JSONSchemaWithMetadata` instances. + # + # @private + class Merger + # @dynamic unused_deprecated_elements + attr_reader :unused_deprecated_elements + + def initialize(schema_def_results) + @field_metadata_by_type_and_field_name = schema_def_results.json_schema_field_metadata_by_type_and_field_name + @renamed_types_by_old_name = schema_def_results.state.renamed_types_by_old_name + @deleted_types_by_old_name = schema_def_results.state.deleted_types_by_old_name + @renamed_fields_by_type_name_and_old_field_name = schema_def_results.state.renamed_fields_by_type_name_and_old_field_name + @deleted_fields_by_type_name_and_old_field_name = schema_def_results.state.deleted_fields_by_type_name_and_old_field_name + @state = schema_def_results.state + @derived_indexing_type_names = schema_def_results.derived_indexing_type_names + + @unused_deprecated_elements = ( + @renamed_types_by_old_name.values + + @deleted_types_by_old_name.values + + @renamed_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + + @deleted_fields_by_type_name_and_old_field_name.values.flat_map(&:values) + ).to_set + end - properties = properties.to_h do |field_name, prop| - unless field_name == "__typename" - field_metadata = current_type_name&.then do |name| - field_metadata_for( - name, - field_name, - missing_fields: missing_fields, - definition_conflicts: definition_conflicts - ) + def merge_metadata_into(json_schema) + missing_fields = ::Set.new + missing_types = ::Set.new + definition_conflicts = ::Set.new + old_type_name_by_current_name = {} # : ::Hash[::String, ::String] + + defs = json_schema.fetch("$defs").to_h do |type_name, type_def| + if type_name != EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"]) + current_type_name = determine_current_type_name( + type_name, + missing_types: missing_types, + definition_conflicts: definition_conflicts + ) + + if current_type_name + old_type_name_by_current_name[current_type_name] = type_name + end + + properties = properties.to_h do |field_name, prop| + unless field_name == "__typename" + field_metadata = current_type_name&.then do |name| + field_metadata_for( + name, + field_name, + missing_fields: missing_fields, + definition_conflicts: definition_conflicts + ) + end + + prop = prop.merge({"ElasticGraph" => field_metadata&.to_dumpable_hash}) end - prop = prop.merge({"ElasticGraph" => field_metadata&.to_dumpable_hash}) + [field_name, prop] end - [field_name, prop] + type_def = type_def.merge({"properties" => properties}) end - type_def = type_def.merge({"properties" => properties}) + [type_name, type_def] end - [type_name, type_def] + json_schema = json_schema.merge("$defs" => defs) + + JSONSchemaWithMetadata.new( + json_schema: json_schema, + missing_fields: missing_fields, + missing_types: missing_types, + definition_conflicts: definition_conflicts, + missing_necessary_fields: identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) + ) end - json_schema = json_schema.merge("$defs" => defs) + private - JSONSchemaWithMetadata.new( - json_schema: json_schema, - missing_fields: missing_fields, - missing_types: missing_types, - definition_conflicts: definition_conflicts, - missing_necessary_fields: identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) - ) - end + # Given a historical `type_name`, determines (and returns) the current name for that type. + def determine_current_type_name(type_name, missing_types:, definition_conflicts:) + exists_currently = @field_metadata_by_type_and_field_name.key?(type_name) + deleted = @deleted_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } + renamed = @renamed_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } - private + if [exists_currently, deleted, renamed].count(&:itself) > 1 + definition_conflicts.merge([deleted, renamed].compact) + end - # Given a historical `type_name`, determines (and returns) the current name for that type. - def determine_current_type_name(type_name, missing_types:, definition_conflicts:) - exists_currently = @field_metadata_by_type_and_field_name.key?(type_name) - deleted = @deleted_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } - renamed = @renamed_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) } + return type_name if exists_currently + return nil if deleted + return renamed.name if renamed - if [exists_currently, deleted, renamed].count(&:itself) > 1 - definition_conflicts.merge([deleted, renamed].compact) + missing_types << type_name + nil end - return type_name if exists_currently - return nil if deleted - return renamed.name if renamed + # Given a historical `type_name` and `field_name` determines (and returns) the field metadata for it. + def field_metadata_for(type_name, field_name, missing_fields:, definition_conflicts:) + full_name = "#{type_name}.#{field_name}" - missing_types << type_name - nil - end + current_meta = @field_metadata_by_type_and_field_name.dig(type_name, field_name) + deleted = @deleted_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| + @unused_deprecated_elements.delete(elem) + end + renamed = @renamed_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| + @unused_deprecated_elements.delete(elem) + end - # Given a historical `type_name` and `field_name` determines (and returns) the field metadata for it. - def field_metadata_for(type_name, field_name, missing_fields:, definition_conflicts:) - full_name = "#{type_name}.#{field_name}" + if [current_meta, deleted, renamed].count(&:itself) > 1 + definition_conflicts.merge([deleted, renamed].compact.map { |elem| elem.with(name: full_name) }) + end - current_meta = @field_metadata_by_type_and_field_name.dig(type_name, field_name) - deleted = @deleted_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| - @unused_deprecated_elements.delete(elem) - end - renamed = @renamed_fields_by_type_name_and_old_field_name.dig(type_name, field_name)&.tap do |elem| - @unused_deprecated_elements.delete(elem) - end + return current_meta if current_meta + return nil if deleted + return @field_metadata_by_type_and_field_name.dig(type_name, renamed.name) if renamed - if [current_meta, deleted, renamed].count(&:itself) > 1 - definition_conflicts.merge([deleted, renamed].compact.map { |elem| elem.with(name: full_name) }) + missing_fields << full_name + nil end - return current_meta if current_meta - return nil if deleted - return @field_metadata_by_type_and_field_name.dig(type_name, renamed.name) if renamed + def identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) + json_schema_resolver = JSONSchemaResolver.new(@state, json_schema, old_type_name_by_current_name) - missing_fields << full_name - nil - end - - def identify_missing_necessary_fields(json_schema, old_type_name_by_current_name) - json_schema_resolver = JSONSchemaResolver.new(@state, json_schema, old_type_name_by_current_name) - version = json_schema.fetch(JSON_SCHEMA_VERSION_KEY) - - @state.object_types_by_name.values - .select { |type| type.has_own_index_def? && !@derived_indexing_type_names.include?(type.name) } - .flat_map do |object_type| - identify_missing_necessary_fields_for_index_def( - object_type, - object_type.own_index_def, # : Indexing::Index - json_schema_resolver, version - ) - end - end + @state.object_types_by_name.values + .select { |type| type.has_own_index_def? && !@derived_indexing_type_names.include?(type.name) } + .flat_map do |object_type| + index_def = object_type.own_index_def # : ElasticGraph::SchemaDefinition::Indexing::Index + identify_missing_necessary_fields_for_index_def(index_def, json_schema_resolver) + end + end - def identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver, json_schema_version) - { - "routing" => index_def.routing_field_path, - "rollover" => index_def.rollover_config&.timestamp_field_path - }.compact.filter_map do |field_type, field_path| - if json_schema_resolver.necessary_path_missing?(field_path) - # The JSON schema v # {json_schema_version} artifact has no field that maps to the #{field_type} path of `#{field_path.fully_qualified_path_in_index}`. - - MissingNecessaryField.new( - field_type: field_type, - fully_qualified_path: field_path.fully_qualified_path_in_index - ) + def identify_missing_necessary_fields_for_index_def(index_def, json_schema_resolver) + { + "routing" => index_def.routing_field_path, + "rollover" => index_def.rollover_config&.timestamp_field_path + }.compact.filter_map do |field_type, field_path| + if json_schema_resolver.necessary_path_missing?(field_path) + MissingNecessaryField.new( + field_type: field_type, + fully_qualified_path: field_path.fully_qualified_path_in_index + ) + end end end - end - class JSONSchemaResolver - def initialize(state, json_schema, old_type_name_by_current_name) - @state = state - @old_type_name_by_current_name = old_type_name_by_current_name - @meta_by_old_type_and_name_in_index = ::Hash.new do |hash, type_name| - properties = json_schema.fetch("$defs").fetch(type_name).fetch("properties") + class JSONSchemaResolver + def initialize(state, json_schema, old_type_name_by_current_name) + @state = state + @old_type_name_by_current_name = old_type_name_by_current_name + @meta_by_old_type_and_name_in_index = ::Hash.new do |hash, type_name| + properties = json_schema.fetch("$defs").fetch(type_name).fetch("properties") - hash[type_name] = properties.filter_map do |name, prop| - if (metadata = prop["ElasticGraph"]) - [metadata.fetch("nameInIndex"), metadata] - end - end.to_h + hash[type_name] = properties.filter_map do |name, prop| + if (metadata = prop["ElasticGraph"]) + [metadata.fetch("nameInIndex"), metadata] + end + end.to_h + end end - end - # Indicates if the given `field_path` is (1) necessary and (2) missing from the JSON schema, indicating a problem. - # - # - Returns `false` is the given `field_path` is present in the JSON schema. - # - Returns `false` is the parent type of `field_path` has not been retained in this JSON schema version - # (in that case, the field path is not necessary). - # - Otherwise, returns `true` since the field path is both necessary and missing. - def necessary_path_missing?(field_path) - parent_type = field_path.first_part.parent_type.name - - field_path.path_parts.any? do |path_part| - necessary_path_part_missing?(parent_type, path_part.name_in_index) do |meta| - parent_type = @state.type_ref(meta.fetch("type")).fully_unwrapped.name + # Indicates if the given `field_path` is (1) necessary and (2) missing from the JSON schema, indicating a problem. + # + # - Returns `false` if the given `field_path` is present in the JSON schema. + # - Returns `false` if the parent type of `field_path` has not been retained in this JSON schema version + # (in that case, the field path is not necessary). + # - Otherwise, returns `true` since the field path is both necessary and missing. + def necessary_path_missing?(field_path) + parent_type = field_path.first_part.parent_type.name + + field_path.path_parts.any? do |path_part| + necessary_path_part_missing?(parent_type, path_part.name_in_index) do |meta| + parent_type = @state.type_ref(meta.fetch("type")).fully_unwrapped.name + end end end - end - private + private - def necessary_path_part_missing?(parent_type, name_in_index) - old_type_name = @old_type_name_by_current_name[parent_type] - return false unless old_type_name + def necessary_path_part_missing?(parent_type, name_in_index) + old_type_name = @old_type_name_by_current_name[parent_type] + return false unless old_type_name - meta = @meta_by_old_type_and_name_in_index.dig(old_type_name, name_in_index) - yield meta if meta - !meta + meta = @meta_by_old_type_and_name_in_index.dig(old_type_name, name_in_index) + yield meta if meta + !meta + end end end - end - MissingNecessaryField = ::Data.define(:field_type, :fully_qualified_path) + MissingNecessaryField = ::Data.define(:field_type, :fully_qualified_path) + end end end end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb index 7a8323fa6..e198ec0fa 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb @@ -9,52 +9,54 @@ require "elastic_graph/constants" module ElasticGraph - module SchemaDefinition - # Prunes unused type definitions from a given JSON schema. - # - # @private - class JSONSchemaPruner - def self.prune(original_json_schema) - initial_type_names = [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema - .dig("$defs", EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum") - - types_to_keep = referenced_type_names(initial_type_names, original_json_schema["$defs"]) - - # The .select will preserve the sort order of the original hash - # standard:disable Style/HashSlice -- https://github.com/soutaro/steep/issues/1503 - pruned_defs = original_json_schema["$defs"].select { |k, _v| types_to_keep.include?(k) } - # standard:enable Style/HashSlice - - original_json_schema.merge("$defs" => pruned_defs) - end + # Provides JSON ingestion support for ElasticGraph. + module JSONIngestion + # Schema definition integration for JSON ingestion artifacts. + module SchemaDefinition + # Prunes unused type definitions from a given JSON schema. + # + # @private + class JSONSchemaPruner + def self.prune(original_json_schema) + initial_type_names = [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema + .dig("$defs", EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum") - # Returns a list of type names indicating all types referenced from any type in source_type_names. - private_class_method - def self.referenced_type_names(source_type_names, original_defs) - return Set.new if source_type_names.empty? + types_to_keep = referenced_type_names(initial_type_names, original_json_schema["$defs"]) - referenced_type_defs = original_defs.slice(*source_type_names) - ref_names = collect_ref_names(referenced_type_defs) + # The .select will preserve the sort order of the original hash + # standard:disable Style/HashSlice -- https://github.com/soutaro/steep/issues/1503 + pruned_defs = original_json_schema["$defs"].select { |type_name, _type_def| types_to_keep.include?(type_name) } + # standard:enable Style/HashSlice - referenced_type_names(ref_names, original_defs) + source_type_names - end + original_json_schema.merge("$defs" => pruned_defs) + end + + # Returns a list of type names indicating all types referenced from any type in source_type_names. + private_class_method def self.referenced_type_names(source_type_names, original_defs) + return Set.new if source_type_names.empty? + + referenced_type_defs = original_defs.slice(*source_type_names) + ref_names = collect_ref_names(referenced_type_defs) + + referenced_type_names(ref_names, original_defs) + source_type_names + end - private_class_method - def self.collect_ref_names(hash) - hash.flat_map do |key, value| - case value - when ::Hash - collect_ref_names(value) - when ::Array - value.grep(::Hash).flat_map { |subhash| collect_ref_names(subhash) } - when ::String - if key == "$ref" && (type = value[%r{\A#/\$defs/(.+)\z}, 1]) - [type] + private_class_method def self.collect_ref_names(hash) + hash.flat_map do |key, value| + case value + when ::Hash + collect_ref_names(value) + when ::Array + value.grep(::Hash).flat_map { |subhash| collect_ref_names(subhash) } + when ::String + if key == "$ref" && (type = value[%r{\A#/\$defs/(.+)\z}, 1]) + [type] + else + [] + end else [] end - else - [] end end end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs index aa609f3de..bed69cb23 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs @@ -1,8 +1,10 @@ module ElasticGraph - module SchemaDefinition - module Indexing - module EventEnvelope - def self.json_schema: (::Array[::String], ::Integer) -> ::Hash[::String, untyped] + module JSONIngestion + module SchemaDefinition + module Indexing + module EventEnvelope + def self.json_schema: (::Array[::String], ::Integer) -> ::Hash[::String, untyped] + end end end end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs index b025eda15..98948fbca 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs @@ -1,16 +1,18 @@ module ElasticGraph - module SchemaDefinition - module Indexing - class JSONSchemaFieldMetadata - attr_reader type: ::String - attr_reader name_in_index: ::String + module JSONIngestion + module SchemaDefinition + module Indexing + class JSONSchemaFieldMetadata + attr_reader type: ::String + attr_reader name_in_index: ::String - def initialize: ( - type: ::String, - name_in_index: ::String - ) -> void + def initialize: ( + type: ::String, + name_in_index: ::String + ) -> void - def to_dumpable_hash: () -> {"type" => ::String, "nameInIndex" => ::String} + def to_dumpable_hash: () -> {"type" => ::String, "nameInIndex" => ::String} + end end end end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs index dcc37b607..f78578800 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs @@ -1,96 +1,101 @@ module ElasticGraph - module SchemaDefinition - module Indexing - class JSONSchemaWithMetadataSupertype - attr_reader json_schema: ::Hash[::String, untyped] - attr_reader missing_fields: ::Set[::String] - attr_reader missing_types: ::Set[::String] - attr_reader definition_conflicts: ::Set[SchemaElements::DeprecatedElement] - attr_reader missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] - - def initialize: ( - json_schema: ::Hash[::String, untyped], - missing_fields: ::Set[::String], - missing_types: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement], - missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] - ) -> void - - def with: ( - ?json_schema: ::Hash[::String, untyped], - ?missing_fields: ::Set[::String], - ?missing_types: ::Set[::String], - ?definition_conflicts: ::Set[SchemaElements::DeprecatedElement], - ?missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] - ) -> instance - end - - class JSONSchemaWithMetadata < JSONSchemaWithMetadataSupertype - def json_schema_version: () -> ::Integer - - class Merger - @field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONSchemaFieldMetadata]] - @renamed_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] - @deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement] - @renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - @deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]] - @state: State - @derived_indexing_type_names: ::Set[::String] - - attr_reader unused_deprecated_elements: ::Set[SchemaElements::DeprecatedElement] + module JSONIngestion + module SchemaDefinition + module Indexing + class JSONSchemaWithMetadataSupertype + attr_reader json_schema: ::Hash[::String, untyped] + attr_reader missing_fields: ::Set[::String] + attr_reader missing_types: ::Set[::String] + attr_reader definition_conflicts: ::Set[ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + attr_reader missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] - def initialize: (Results) -> void - def merge_metadata_into: (::Hash[::String, untyped]) -> JSONSchemaWithMetadata - - private - - def determine_current_type_name: ( - ::String, + def initialize: ( + json_schema: ::Hash[::String, untyped], + missing_fields: ::Set[::String], missing_types: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement] - ) -> ::String? + definition_conflicts: ::Set[ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], + missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + ) -> void - def field_metadata_for: ( - ::String, - ::String, - missing_fields: ::Set[::String], - definition_conflicts: ::Set[SchemaElements::DeprecatedElement] - ) -> JSONSchemaFieldMetadata? + def with: ( + ?json_schema: ::Hash[::String, untyped], + ?missing_fields: ::Set[::String], + ?missing_types: ::Set[::String], + ?definition_conflicts: ::Set[ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement], + ?missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField] + ) -> instance + end - def identify_missing_necessary_fields: ( - ::Hash[::String, untyped], - ::Hash[::String, ::String] - ) -> ::Array[MissingNecessaryField] + class JSONSchemaWithMetadata < JSONSchemaWithMetadataSupertype + def json_schema_version: () -> ::Integer - def identify_missing_necessary_fields_for_index_def: ( - indexableType, - Index, - JSONSchemaResolver, - ::Integer - ) -> ::Array[MissingNecessaryField] + class Merger + @field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONSchemaFieldMetadata]] + @renamed_types_by_old_name: ::Hash[::String, ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @deleted_types_by_old_name: ::Hash[::String, ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + @renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]] + @state: ElasticGraph::SchemaDefinition::State + @derived_indexing_type_names: ::Set[::String] - class JSONSchemaResolver - @state: State - @old_type_name_by_current_name: ::Hash[::String, ::String] - @meta_by_old_type_and_name_in_index: ::Hash[::String, ::Hash[::String, ::Hash[::String, untyped]]] + attr_reader unused_deprecated_elements: ::Set[ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] - def initialize: (State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void - def necessary_path_missing?: (SchemaElements::FieldPath) -> bool + def initialize: (ElasticGraph::SchemaDefinition::Results) -> void + def merge_metadata_into: (::Hash[::String, untyped]) -> JSONSchemaWithMetadata private - def necessary_path_part_missing?: (::String, ::String) { (::Hash[::String, untyped]) -> void } -> bool + def determine_current_type_name: ( + ::String, + missing_types: ::Set[::String], + definition_conflicts: ::Set[ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + ) -> ::String? + + def field_metadata_for: ( + ::String, + ::String, + missing_fields: ::Set[::String], + definition_conflicts: ::Set[ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + ) -> JSONSchemaFieldMetadata? + + def identify_missing_necessary_fields: ( + ::Hash[::String, untyped], + ::Hash[::String, ::String] + ) -> ::Array[MissingNecessaryField] + + def identify_missing_necessary_fields_for_index_def: ( + ElasticGraph::SchemaDefinition::Indexing::Index, + JSONSchemaResolver + ) -> ::Array[MissingNecessaryField] + + class JSONSchemaResolver + @state: ElasticGraph::SchemaDefinition::State + @old_type_name_by_current_name: ::Hash[::String, ::String] + @meta_by_old_type_and_name_in_index: ::Hash[::String, ::Hash[::String, ::Hash[::String, untyped]]] + + def initialize: (ElasticGraph::SchemaDefinition::State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void + def necessary_path_missing?: (ElasticGraph::SchemaDefinition::SchemaElements::FieldPath) -> bool + + private + + def necessary_path_part_missing?: (::String, ::String) { (::Hash[::String, untyped]) -> void } -> bool + end end - end - class MissingNecessaryField - attr_reader field_type: ::String - attr_reader fully_qualified_path: ::String + class MissingNecessaryField + attr_reader field_type: ::String + attr_reader fully_qualified_path: ::String - def initialize: ( - field_type: ::String, - fully_qualified_path: ::String - ) -> void + def initialize: ( + field_type: ::String, + fully_qualified_path: ::String + ) -> void + + def with: ( + ?field_type: ::String, + ?fully_qualified_path: ::String + ) -> MissingNecessaryField + end end end end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs index 8c5f323a2..898a7660d 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs @@ -1,9 +1,14 @@ module ElasticGraph - module SchemaDefinition - class JSONSchemaPruner - def self.prune: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] - def self.referenced_type_names: (::Array[::String], ::Hash[::String, untyped]) -> ::Set[::String] - def self.collect_ref_names: (::Hash[::String, untyped]) -> ::Array[::String] + module JSONIngestion + module SchemaDefinition + class JSONSchemaPruner + def self.prune: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] + + private + + def self.referenced_type_names: (::Array[::String], ::Hash[::String, untyped]) -> ::Set[::String] + def self.collect_ref_names: (::Hash[::String, untyped]) -> ::Array[::String] + end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb index 80306e237..924f28cd6 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/api.rb @@ -458,7 +458,7 @@ def results # version number. The publisher will then include this version number in published events to identify the version of the schema it # was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync. # - # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly + # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying to constantly # have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this # on every JSON schema change with {#enforce_json_schema_version}. # diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb index fa215a5f1..1401aa949 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb @@ -77,7 +77,7 @@ def json_schema # @return [JSONSchemaFieldMetadata] additional ElasticGraph metadata to be stored in the JSON schema for this field. def json_schema_metadata - JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index) + JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index) end # Builds a hash containing the mapping for the provided fields, normalizing it in the same way that the diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index 160e3c854..da347058e 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -114,7 +114,7 @@ def after_initialize end def json_schema_with_metadata_merger - @json_schema_with_metadata_merger ||= Indexing::JSONSchemaWithMetadata::Merger.new(self) + @json_schema_with_metadata_merger ||= JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata::Merger.new(self) end def generate_datastore_config @@ -283,11 +283,13 @@ def build_public_json_schema .transform_values(&:to_json_schema) .compact + event_envelope = JSONIngestion::SchemaDefinition::Indexing::EventEnvelope + { "$schema" => JSON_META_SCHEMA, JSON_SCHEMA_VERSION_KEY => json_schema_version, "$defs" => { - "ElasticGraphEventEnvelope" => Indexing::EventEnvelope.json_schema(root_document_type_names, json_schema_version) + "ElasticGraphEventEnvelope" => event_envelope.json_schema(root_document_type_names, json_schema_version) }.merge(definitions_by_name) } end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb index b1c3c3f1a..e2dac36d0 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb @@ -39,7 +39,7 @@ def initialize(schema_definition_results:, schema_artifacts_directory:, output:, @json_schemas_artifact = new_yaml_artifact( JSON_SCHEMAS_FILE, - JSONSchemaPruner.prune(schema_definition_results.current_public_json_schema), + JSONIngestion::SchemaDefinition::JSONSchemaPruner.prune(schema_definition_results.current_public_json_schema), extra_comment_lines: [ "This is the \"public\" JSON schema file and is intended to be provided to publishers so that", "they can perform code generation and event validation." @@ -191,7 +191,7 @@ def build_desired_versioned_json_schemas(current_public_json_schema) def report_json_schema_merge_errors(merged_results) json_schema_versions_by_missing_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] json_schema_versions_by_missing_type = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] - json_schema_versions_by_missing_necessary_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]] + json_schema_versions_by_missing_necessary_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]] merged_results.each do |result| result.missing_fields.each do |field| @@ -278,7 +278,7 @@ def missing_type_error_for(type, json_schema_versions) To continue, do one of the following: 1. If the `#{type}` type has been renamed, indicate this by calling `type.renamed_from "#{type}"` on the renamed type. - 2. If the `#{type}` field has been dropped, indicate this by calling `schema.deleted_type "#{type}"` on the schema. + 2. If the `#{type}` type has been dropped, indicate this by calling `schema.deleted_type "#{type}"` on the schema. 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. EOS end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs index 432e62034..80f6eab04 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field.rbs @@ -50,7 +50,7 @@ module ElasticGraph @mapping: ::Hash[::String, untyped]? def mapping: () -> ::Hash[::String, untyped] def json_schema: () -> ::Hash[::String, untyped] - def json_schema_metadata: () -> JSONSchemaFieldMetadata + def json_schema_metadata: () -> JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata def self.normalized_mapping_hash_for: (::Array[Field]) -> ::Hash[::String, untyped] diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs index 5808a0709..bc5e08829 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/field_type.rbs @@ -4,7 +4,7 @@ module ElasticGraph interface _FieldType def to_mapping: () -> ::Hash[::String, untyped] def to_json_schema: () -> ::Hash[::String, untyped] - def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] + def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata] def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs index c35f971f2..ca21710f0 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs @@ -10,9 +10,9 @@ module ElasticGraph include Support::_MemoizableDataClass def json_schema_version_setter_location: () -> ::Thread::Backtrace::Location? - def json_schema_field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]] + def json_schema_field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata]] def current_public_json_schema: () -> ::Hash[::String, untyped] - def merge_field_metadata_into_json_schema: (::Hash[::String, untyped]) -> Indexing::JSONSchemaWithMetadata + def merge_field_metadata_into_json_schema: (::Hash[::String, untyped]) -> JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata def unused_deprecated_elements: () -> ::Set[SchemaElements::DeprecatedElement] def derived_indexing_type_names: () -> ::Set[::String] @@ -26,16 +26,16 @@ module ElasticGraph @field_path_resolver: SchemaElements::FieldPath::Resolver? @json_schema_indexing_field_types_by_name: ::Hash[::String, Indexing::_FieldType]? @derived_indexing_type_names: ::Set[::String]? - @json_schema_field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]]? + @json_schema_field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata]]? @current_public_json_schema: ::Hash[::String, untyped]? @latest_versioned_json_schema: ::Hash[::String, untyped]? - @json_schema_with_metadata_merger: Indexing::JSONSchemaWithMetadata::Merger? + @json_schema_with_metadata_merger: JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata::Merger? STATIC_SCRIPT_REPO: Scripting::FileSystemRepository private - def json_schema_with_metadata_merger: () -> Indexing::JSONSchemaWithMetadata::Merger + def json_schema_with_metadata_merger: () -> JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata::Merger def generate_datastore_config: () -> ::Hash[::String, untyped] def build_dynamic_scripts: () -> ::Array[Scripting::Script] def build_runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::Schema diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs index 7c34a8824..389dfbfdd 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs @@ -27,12 +27,12 @@ module ElasticGraph def notify_about_unused_type_name_overrides: () -> void def notify_about_unused_enum_value_overrides: () -> void def build_desired_versioned_json_schemas: (::Hash[::String, untyped]) -> ::Hash[::Integer, ::Hash[::String, untyped]] - def report_json_schema_merge_errors: (::Array[Indexing::JSONSchemaWithMetadata]) -> void + def report_json_schema_merge_errors: (::Array[JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata]) -> void def report_json_schema_merge_warnings: () -> void def format_deprecated_elements: (::Enumerable[SchemaElements::DeprecatedElement]) -> ::String def missing_field_error_for: (::String, ::Array[::Integer]) -> ::String def missing_type_error_for: (::String, ::Array[::Integer]) -> ::String - def missing_necessary_field_error_for: (Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]) -> ::String + def missing_necessary_field_error_for: (JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]) -> ::String def describe_json_schema_versions: (::Array[::Integer], ::String) -> ::String def old_versions: (::Array[::Integer]) -> ::String def files_noun_phrase: (::Array[::Integer]) -> ::String diff --git a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb index 54a235ec8..e26a47ca4 100644 --- a/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb +++ b/elasticgraph-schema_definition/spec/integration/elastic_graph/schema_definition/rake_tasks_spec.rb @@ -585,7 +585,7 @@ module SchemaDefinition To continue, do one of the following: 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. - 2. If the `Component` field has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. + 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. 3. Alternately, if no publishers or in-flight events use JSON schema version 1, delete its file from `json_schemas_by_version`, and no further changes are required. EOS @@ -600,7 +600,7 @@ module SchemaDefinition To continue, do one of the following: 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. - 2. If the `Component` field has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. + 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. 3. Alternately, if no publishers or in-flight events use JSON schema versions 1 or 2, delete their files from `json_schemas_by_version`, and no further changes are required. EOS @@ -615,7 +615,7 @@ module SchemaDefinition To continue, do one of the following: 1. If the `Component` type has been renamed, indicate this by calling `type.renamed_from "Component"` on the renamed type. - 2. If the `Component` field has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. + 2. If the `Component` type has been dropped, indicate this by calling `schema.deleted_type "Component"` on the schema. 3. Alternately, if no publishers or in-flight events use JSON schema versions 1, 2, or 3, delete their files from `json_schemas_by_version`, and no further changes are required. EOS diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb index 22fc317ca..f3cd503ac 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/indexing/json_schema_with_metadata_spec.rb @@ -12,7 +12,7 @@ module ElasticGraph module SchemaDefinition module Indexing - ::RSpec.describe JSONSchemaWithMetadata do + ::RSpec.describe JSONIngestion::SchemaDefinition::Indexing::JSONSchemaWithMetadata do include_context "SchemaDefinitionHelpers" it "ignores derived indexed types that do not show up in the JSON schema" do @@ -1062,7 +1062,7 @@ def have_dumped_metadata(name_in_index, type) end def missing_necessary_field_of(field_type, fully_qualified_path) - JSONSchemaWithMetadata::MissingNecessaryField.new(field_type, fully_qualified_path) + described_class::MissingNecessaryField.new(field_type, fully_qualified_path) end end end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb index 6c0aa3799..4e101b378 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb @@ -6,6 +6,7 @@ # # frozen_string_literal: true +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata" require "elastic_graph/spec_support/schema_definition_helpers" module ElasticGraph @@ -144,7 +145,7 @@ def define_schema(&schema_definition) end def field_meta_of(type, name_in_index) - Indexing::JSONSchemaFieldMetadata.new(type: type, name_in_index: name_in_index) + JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata.new(type: type, name_in_index: name_in_index) end end end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb index 66c140101..e9c01c9f2 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_pruner_spec.rb @@ -12,7 +12,7 @@ module ElasticGraph module SchemaDefinition - RSpec.describe JSONSchemaPruner do + RSpec.describe JSONIngestion::SchemaDefinition::JSONSchemaPruner do include_context "SchemaDefinitionHelpers" describe ".prune" do