Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/mindee/input.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true

require_relative 'input/data_schema'
require_relative 'input/inference_parameters'
require_relative 'input/polling_options'
require_relative 'input/sources'
126 changes: 126 additions & 0 deletions lib/mindee/input/data_schema.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# frozen_string_literal: true

module Mindee
module Input
# Data Schema Field.
class DataSchemaField
# @return [String] Display name for the field, also impacts inference results.
attr_reader :title
# @return [String] Name of the field in the data schema.
attr_reader :name
# @return [Boolean] Whether this field can contain multiple values.
attr_reader :is_array
# @return [String] Data type of the field.
attr_reader :type
# @return [Array<String>, nil] Allowed values when type is `classification`. Leave empty for other types.
attr_reader :classification_values
# @return [Boolean, nil] Whether to remove duplicate values in the array.
# Only applicable if `is_array` is True.
attr_reader :unique_values
# @return [String, nil] Detailed description of what this field represents.
attr_reader :description
# @return [String, nil] Optional extraction guidelines.
attr_reader :guidelines
# @return [Array<Hash>, nil] Nested fields.
attr_reader :nested_fields

# @param field [Hash]
def initialize(field)
field.transform_keys!(&:to_sym)
@name = field[:name]
@title = field[:title]
@is_array = field[:is_array]
@type = field[:type]
@classification_values = field[:classification_values]
@unique_values = field[:unique_values]
@description = field[:description]
@guidelines = field[:guidelines]
@nested_fields = field[:nested_fields]
end

# @return [Hash]
def to_hash
out = {
name: @name,
title: @title,
is_array: @is_array,
type: @type,
} # @type var out: Hash[Symbol, untyped]
out[:classification_values] = @classification_values unless @classification_values.nil?
out[:unique_values] = @unique_values unless @unique_values.nil?
out[:description] = @description unless @description.nil?
out[:guidelines] = @guidelines unless @guidelines.nil?
out[:nested_fields] = @nested_fields unless @nested_fields.nil?
out
end

# @return [String]
def to_s
to_hash.to_json
end
end

# The structure to completely replace the data schema of the model.
class DataSchemaReplace
# @return [Array<DataSchemaField>] Subfields when type is `nested_object`. Leave empty for other types.
attr_reader :fields

# @param data_schema_replace [Hash]
def initialize(data_schema_replace)
data_schema_replace.transform_keys!(&:to_sym)
fields_list = data_schema_replace[:fields]
raise Mindee::Errors::MindeeError, 'Invalid Data Schema provided.' if fields_list.nil?
raise TypeError, 'Data Schema replacement fields cannot be empty.' if fields_list.empty?

@fields = fields_list.map { |field| DataSchemaField.new(field) }
end

# @return [Hash]
def to_hash
{ fields: @fields.map(&:to_hash) }
end

# @return [String]
def to_s
to_hash.to_json
end
end

# Modify the Data Schema.
class DataSchema
# @return [Mindee::Input::DataSchemaReplace]
attr_reader :replace

# @param data_schema [Hash, String]
def initialize(data_schema)
case data_schema
when String
parsed = JSON.parse(data_schema.to_s, object_class: Hash)
parsed.transform_keys!(&:to_sym)
@replace = DataSchemaReplace.new(parsed[:replace])
when Hash
data_schema.transform_keys!(&:to_sym)
@replace = if data_schema[:replace].is_a?(DataSchemaReplace)
data_schema[:replace]
else
DataSchemaReplace.new(data_schema[:replace])
end
when DataSchema
@replace = data_schema.replace
else
raise TypeError, 'Invalid Data Schema provided.'
end
end

# @return [Hash]
def to_hash
{ replace: @replace.to_hash }
end

# @return [String]
def to_s
to_hash.to_json
end
end
end
end
9 changes: 8 additions & 1 deletion lib/mindee/input/inference_parameters.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require_relative 'data_schema'

module Mindee
module Input
# Parameters to set when sending a file for inference.
Expand Down Expand Up @@ -35,6 +37,9 @@ class InferenceParameters
# @return [PollingOptions] Options for polling. Set only if having timeout issues.
attr_reader :polling_options

# @return [DataSchemaField]
attr_reader :data_schema

# @return [Boolean, nil] Whether to close the file after parsing.
attr_reader :close_file

Expand All @@ -58,7 +63,8 @@ def initialize(
webhook_ids: nil,
text_context: nil,
polling_options: nil,
close_file: true
close_file: true,
data_schema: nil
)
raise Errors::MindeeInputError, 'Model ID is required.' if model_id.empty? || model_id.nil?

Expand All @@ -72,6 +78,7 @@ def initialize(
@text_context = text_context
@polling_options = get_clean_polling_options(polling_options)
@close_file = close_file.nil? || close_file
@data_schema = DataSchema.new(data_schema) unless data_schema.nil?
# rubocop:enable Metrics/ParameterLists
end

Expand Down
22 changes: 22 additions & 0 deletions lib/mindee/parsing/v2/inference_active_options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@
module Mindee
module Parsing
module V2
# Data schema options activated during the inference.
class DataSchemaActiveOption
# @return [Boolean]
attr_reader :replace

# @param server_response [Hash]
def initialize(server_response)
@replace = server_response[:replace] || server_response['replace']
end

# String representation.
# @return [String]
def to_s
"Data Schema\n-----------\n:Replace: #{@replace ? 'True' : 'False'}"
end
end

# Options which were activated during the inference.
class InferenceActiveOptions
# @return [Boolean] Whether the Raw Text feature was activated.
Expand All @@ -15,6 +32,8 @@ class InferenceActiveOptions
attr_reader :rag
# @return [Boolean] Whether the text context feature was activated.
attr_reader :text_context
# @return [DataSchemaActiveOption]
attr_reader :data_schema

# @param server_response [Hash] Raw JSON parsed into a Hash.
def initialize(server_response)
Expand All @@ -23,6 +42,7 @@ def initialize(server_response)
@confidence = server_response['confidence']
@rag = server_response['rag']
@text_context = server_response['text_context']
@data_schema = DataSchemaActiveOption.new(server_response['data_schema'])
end

# String representation.
Expand All @@ -35,6 +55,8 @@ def to_s
":Polygon: #{@polygon ? 'True' : 'False'}",
":Confidence: #{@confidence ? 'True' : 'False'}",
":RAG: #{@rag ? 'True' : 'False'}",
":Text Context: #{@text_context ? 'True' : 'False'}\n",
@data_schema.to_s,
'',
]
parts.join("\n")
Expand Down
34 changes: 34 additions & 0 deletions sig/mindee/input/data_schema.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
module Mindee
module Input
class DataSchemaField
attr_reader title: String
attr_reader name: String
attr_reader is_array: bool
attr_reader type: String
attr_reader classification_values: String|nil
attr_reader unique_values: bool|nil
attr_reader description: String|nil
attr_reader guidelines: String|nil
attr_reader nested_fields: Array[Hash[String|Symbol, untyped]]|nil

def initialize: (Hash[Symbol, untyped]) -> void
def to_hash: () -> Hash[Symbol, untyped]
def to_string: () -> String
end

class DataSchemaReplace
attr_reader fields: Array[DataSchemaField]
def initialize: (Hash[Symbol, untyped]) -> void
def to_hash: () -> Hash[Symbol, untyped]
def to_string: () -> String
end

class DataSchema
attr_reader replace: DataSchemaReplace

def initialize: (Hash[String|Symbol, untyped]|String|DataSchema) -> void
def to_hash: () -> Hash[Symbol, untyped]
def to_s: -> String
end
end
end
4 changes: 3 additions & 1 deletion sig/mindee/input/inference_parameters.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module Mindee
attr_reader raw_text: bool?
attr_reader text_context: String?
attr_reader webhook_ids: Array[String]?
attr_reader data_schema: DataSchema?

def initialize: (
String,
Expand All @@ -23,7 +24,8 @@ module Mindee
?text_context: String?,
?webhook_ids: Array[String]?,
?polling_options: Hash[Symbol | String, untyped] | PollingOptions?,
?close_file: bool?
?close_file: bool?,
?data_schema: DataSchema|String|Hash[Symbol | String, untyped]?
) -> void

def self.from_hash: (params: Hash[String | Symbol, untyped]) -> InferenceParameters
Expand Down
8 changes: 8 additions & 0 deletions sig/mindee/parsing/v2/inference_active_options.rbs
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
module Mindee
module Parsing
module V2
class DataSchemaActiveOption
attr_reader replace: bool

def initialize: (Hash[Symbol |string, untyped]) -> void
def to_s: () -> String
end
class InferenceActiveOptions
attr_reader confidence: bool
attr_reader polygon: bool
attr_reader rag: bool
attr_reader raw_text: bool
attr_reader text_context: bool
attr_reader data_schema: DataSchemaActiveOption

def initialize: (Hash[String | Symbol, untyped]) -> void
def to_s: () -> String
end
end
end
Expand Down
52 changes: 49 additions & 3 deletions spec/v2/client_v2_integration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
raw_text: true,
polygon: false,
confidence: false,
file_alias: 'ruby-integration-test',
file_alias: 'rb_integration_test',
polling_options: polling,
text_context: 'this is a test'
)
Expand Down Expand Up @@ -72,7 +72,7 @@
polygon: false,
confidence: false,
rag: false,
file_alias: 'ruby-integration-test'
file_alias: 'rb_integration_test'
)

response = client.enqueue_and_get_inference(input, inference_params)
Expand Down Expand Up @@ -191,7 +191,7 @@
polygon: false,
confidence: false,
rag: false,
file_alias: 'ruby-integration-test'
file_alias: 'rb_integration_test'
)
client.enqueue_and_get_inference(input, inference_params)
end.to raise_error(Mindee::Errors::MindeeHTTPErrorV2) { |e|
Expand All @@ -216,4 +216,50 @@
expect(response.inference).not_to be_nil
end
end

context 'A Data Schema Override' do
it 'Overrides successfully' do
data_schema_replace = File.read(File.join(V2_DATA_DIR, 'inference', 'data_schema_replace_param.json'))
input = Mindee::Input::Source::PathInputSource.new(File.join(FILE_TYPES_DIR, 'pdf', 'blank_1.pdf'))

inference_params = Mindee::Input::InferenceParameters.new(
model_id,
raw_text: false,
polygon: false,
confidence: false,
rag: false,
file_alias: 'rb_integration_data_schema_replace',
data_schema: data_schema_replace
)

response = client.enqueue_and_get_inference(input, inference_params)
expect(response).not_to be_nil

model = response.inference.model
expect(model).not_to be_nil
expect(model).to be_a(Mindee::Parsing::V2::InferenceModel)
expect(model.id).to eq(model_id)

active_options = response.inference.active_options
expect(active_options).not_to be_nil
expect(active_options).to be_a(Mindee::Parsing::V2::InferenceActiveOptions)
expect(active_options.raw_text).to eq(false)
expect(active_options.polygon).to eq(false)
expect(active_options.confidence).to eq(false)
expect(active_options.rag).to eq(false)
expect(active_options.text_context).to eq(false)
expect(active_options.data_schema).to_not be_nil
expect(active_options.data_schema.replace).to eq(true)

result = response.inference.result
expect(result).not_to be_nil

expect(result.raw_text).to be_nil

fields = result.fields
expect(fields).not_to be_nil
expect(fields['test_replace']).not_to be_nil
expect(fields['test_replace'].value).to eq('a test value')
end
end
end
Loading
Loading