From 91911038c313621c337e089949413fbd13f48baa Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Wed, 18 Feb 2026 22:03:02 +0700 Subject: [PATCH 1/9] Add 4 new operations: detect_language, generate_tags, anonymize, compare Co-Authored-By: Claude Sonnet 4 --- lib/ruby_llm/text/anonymize.rb | 186 +++++++++++++++++++++++++++ lib/ruby_llm/text/compare.rb | 126 ++++++++++++++++++ lib/ruby_llm/text/detect_language.rb | 79 ++++++++++++ lib/ruby_llm/text/generate_tags.rb | 70 ++++++++++ 4 files changed, 461 insertions(+) create mode 100644 lib/ruby_llm/text/anonymize.rb create mode 100644 lib/ruby_llm/text/compare.rb create mode 100644 lib/ruby_llm/text/detect_language.rb create mode 100644 lib/ruby_llm/text/generate_tags.rb diff --git a/lib/ruby_llm/text/anonymize.rb b/lib/ruby_llm/text/anonymize.rb new file mode 100644 index 0000000..13f4c4a --- /dev/null +++ b/lib/ruby_llm/text/anonymize.rb @@ -0,0 +1,186 @@ +module RubyLLM + module Text + module Anonymize + # Default PII types to detect and anonymize + DEFAULT_PII_TYPES = [ :names, :emails, :phones, :addresses ].freeze + + def self.call(text, pii_types: DEFAULT_PII_TYPES, replacement_style: :generic, include_mapping: false, model: nil, **options) + model ||= RubyLLM::Text.config.model_for(:anonymize) + + # Handle :all shortcut for all PII types + pii_types = DEFAULT_PII_TYPES if pii_types == [ :all ] + + prompt = build_prompt(text, pii_types: pii_types, replacement_style: replacement_style, include_mapping: include_mapping) + + if include_mapping + # For structured output with anonymized text and replacement mapping + schema = build_mapping_schema() + response = Base.call_llm(prompt, model: model, schema: schema, **options) + + begin + result = JSON.parse(Base.clean_json_response(response)) + rescue JSON::ParserError + # Fallback: if JSON parsing fails, return best-effort structured response + cleaned_response = Base.clean_json_response(response) + result = { + "text" => cleaned_response, + "mapping" => {} + } + end + + result + else + Base.call_llm(prompt, model: model, **options) + end + end + + private + + def self.build_prompt(text, pii_types:, replacement_style:, include_mapping:) + # Build PII type instructions + pii_instructions = build_pii_instructions(pii_types) + replacement_instructions = build_replacement_instructions(replacement_style, pii_types) + + if include_mapping + output_instruction = <<~OUTPUT + Return a JSON object with: + - "text": the anonymized text with PII replaced + - "mapping": an object mapping each replacement token to its original value + + Example: + { + "text": "Contact [PERSON_1] at [EMAIL_1]", + "mapping": { + "[PERSON_1]": "John Doe", + "[EMAIL_1]": "john.doe@example.com" + } + } + OUTPUT + else + output_instruction = <<~OUTPUT + Return only the anonymized text with PII replaced by appropriate tokens. + Do not include any explanation or notes. + OUTPUT + end + + <<~PROMPT + Anonymize the following text by replacing personally identifiable information (PII) with replacement tokens. + + #{pii_instructions} + #{replacement_instructions} + + #{output_instruction} + + Text: + #{text} + PROMPT + end + + def self.build_pii_instructions(pii_types) + # Handle :all shortcut + pii_types = DEFAULT_PII_TYPES if pii_types == [ :all ] + + instructions = [ "Identify and replace the following types of PII:" ] + + if pii_types.include?(:names) + instructions << "- Names (personal names, full names, first names, last names)" + end + + if pii_types.include?(:emails) + instructions << "- Email addresses" + end + + if pii_types.include?(:phones) + instructions << "- Phone numbers (including various formats)" + end + + if pii_types.include?(:addresses) + instructions << "- Physical addresses (street addresses, cities, postal codes)" + end + + if pii_types.include?(:ssn) + instructions << "- Social Security Numbers" + end + + if pii_types.include?(:credit_cards) + instructions << "- Credit card numbers" + end + + instructions.join("\n") + end + + def self.build_replacement_instructions(replacement_style, pii_types) + # Handle :all shortcut + pii_types = DEFAULT_PII_TYPES if pii_types == [ :all ] + + instructions = [ "Use #{replacement_style} replacement tokens:" ] + + case replacement_style + when :generic + if pii_types.include?(:names) + instructions << "- Names: [PERSON], [PERSON_1], [PERSON_2], etc. for multiple people" + end + if pii_types.include?(:emails) + instructions << "- Emails: [EMAIL], [EMAIL_1], [EMAIL_2], etc. for multiple emails" + end + if pii_types.include?(:phones) + instructions << "- Phones: [PHONE], [PHONE_1], [PHONE_2], etc." + end + if pii_types.include?(:addresses) + instructions << "- Addresses: [ADDRESS], [ADDRESS_1], [ADDRESS_2], etc." + end + if pii_types.include?(:ssn) + instructions << "- SSN: [SSN], [SSN_1], [SSN_2], etc." + end + if pii_types.include?(:credit_cards) + instructions << "- Credit Cards: [CREDIT_CARD], [CREDIT_CARD_1], etc." + end + when :numbered + if pii_types.include?(:names) + instructions << "- Names: [PERSON_1], [PERSON_2], etc." + end + if pii_types.include?(:emails) + instructions << "- Emails: [EMAIL_1], [EMAIL_2], etc." + end + if pii_types.include?(:phones) + instructions << "- Phones: [PHONE_1], [PHONE_2], etc." + end + if pii_types.include?(:addresses) + instructions << "- Addresses: [ADDRESS_1], [ADDRESS_2], etc." + end + when :descriptive + if pii_types.include?(:names) + instructions << "- Names: [FIRST_NAME], [LAST_NAME], [FULL_NAME]" + end + if pii_types.include?(:emails) + instructions << "- Emails: [EMAIL_ADDRESS]" + end + if pii_types.include?(:phones) + instructions << "- Phones: [PHONE_NUMBER]" + end + if pii_types.include?(:addresses) + instructions << "- Addresses: [STREET_ADDRESS], [CITY], [POSTAL_CODE]" + end + else + return build_replacement_instructions(:generic, pii_types) + end + + instructions.join("\n") + end + + def self.build_mapping_schema + { + type: "object", + properties: { + text: { type: "string" }, + mapping: { + type: "object", + additionalProperties: { type: "string" } + } + }, + required: [ "text", "mapping" ] + } + end + end + end +end diff --git a/lib/ruby_llm/text/compare.rb b/lib/ruby_llm/text/compare.rb new file mode 100644 index 0000000..54dc6f3 --- /dev/null +++ b/lib/ruby_llm/text/compare.rb @@ -0,0 +1,126 @@ +module RubyLLM + module Text + module Compare + def self.call(text1, text2, comparison_type: :similarity, model: nil, **options) + model ||= RubyLLM::Text.config.model_for(:compare) + + prompt = build_prompt(text1, text2, comparison_type: comparison_type) + schema = build_comparison_schema(comparison_type) + response = Base.call_llm(prompt, model: model, schema: schema, **options) + + begin + result = JSON.parse(Base.clean_json_response(response)) + rescue JSON::ParserError + # Fallback: if JSON parsing fails, return basic structured response + result = { + "similarity" => nil, + "comparison_type" => comparison_type.to_s, + "error" => "Failed to parse comparison result" + } + end + + # Convert similarity to float when present + if result.key?("similarity") && !result["similarity"].nil? + result["similarity"] = result["similarity"].to_f + end + + result + end + + private + + def self.build_prompt(text1, text2, comparison_type:) + case comparison_type + when :similarity + comparison_instruction = <<~INSTRUCTION + Compare the two texts and provide: + - A similarity score from 0 to 1 (where 1 is identical and 0 is completely different) + - The type of similarity detected (semantic, structural, topical, etc.) + - A brief summary of what makes them similar or different + + Focus on semantic similarity - texts with the same meaning should score high even if worded differently. + INSTRUCTION + when :detailed + comparison_instruction = <<~INSTRUCTION + Provide a detailed comparison including: + - Overall similarity score from 0 to 1 + - Specific differences between the texts (tone, style, content, structure, etc.) + - Common elements or themes found in both texts + - A summary of the key similarities and differences + + Analyze style, tone, content, structure, and intent. + INSTRUCTION + when :changes + comparison_instruction = <<~INSTRUCTION + Analyze the texts as if the second text is a revision of the first and provide: + - Overall similarity score from 0 to 1 + - Types of changes made (additions, deletions, modifications, restructuring) + - Specific examples of what was changed + - Assessment of whether the changes improve or alter the content significantly + + Focus on tracking edits and revisions between the versions. + INSTRUCTION + else + comparison_instruction = build_prompt(text1, text2, comparison_type: :similarity) + end + + <<~PROMPT + Compare the following two texts: + + #{comparison_instruction} + + Text 1: + #{text1} + + Text 2: + #{text2} + PROMPT + end + + def self.build_comparison_schema(comparison_type) + base_properties = { + similarity: { type: "number", minimum: 0, maximum: 1 }, + comparison_type: { type: "string" } + } + + case comparison_type + when :similarity + base_properties.merge!({ + similarity_type: { type: "string" }, + summary: { type: "string" } + }) + when :detailed + base_properties.merge!({ + differences: { + type: "array", + items: { type: "string" } + }, + commonalities: { + type: "array", + items: { type: "string" } + }, + summary: { type: "string" } + }) + when :changes + base_properties.merge!({ + change_types: { + type: "array", + items: { type: "string" } + }, + examples: { + type: "array", + items: { type: "string" } + }, + assessment: { type: "string" } + }) + end + + { + type: "object", + properties: base_properties, + required: [ "similarity", "comparison_type" ] + } + end + end + end +end diff --git a/lib/ruby_llm/text/detect_language.rb b/lib/ruby_llm/text/detect_language.rb new file mode 100644 index 0000000..72ff48e --- /dev/null +++ b/lib/ruby_llm/text/detect_language.rb @@ -0,0 +1,79 @@ +module RubyLLM + module Text + module DetectLanguage + def self.call(text, include_confidence: false, model: nil, **options) + model ||= RubyLLM::Text.config.model_for(:detect_language) + + prompt = build_prompt(text, include_confidence: include_confidence) + + if include_confidence + # For structured output with confidence score and language code + schema = build_confidence_schema() + response = Base.call_llm(prompt, model: model, schema: schema, **options) + + begin + result = JSON.parse(Base.clean_json_response(response)) + rescue JSON::ParserError + # Fallback: if JSON parsing fails, return best-effort structured response + cleaned_response = Base.clean_json_response(response) + result = { + "language" => cleaned_response, + "confidence" => nil, + "code" => nil + } + end + + # Convert confidence to float when present (preserve nil as "unknown") + if result.key?("confidence") && !result["confidence"].nil? + result["confidence"] = result["confidence"].to_f + end + + result + else + Base.call_llm(prompt, model: model, **options) + end + end + + private + + def self.build_prompt(text, include_confidence:) + if include_confidence + output_instruction = <<~OUTPUT + Return a JSON object with: + - "language": the full language name (e.g., "English", "French", "Spanish") + - "confidence": a confidence score between 0 and 1 + - "code": the ISO 639-1 language code (e.g., "en", "fr", "es") + + If the language cannot be reliably detected, return "unknown" as the language with low confidence. + OUTPUT + else + output_instruction = <<~OUTPUT + Return only the full language name (e.g., "English", "French", "Spanish"). + If the language cannot be reliably detected, return "unknown". + OUTPUT + end + + <<~PROMPT + Detect the language of the following text. + + #{output_instruction} + + Text: + #{text} + PROMPT + end + + def self.build_confidence_schema + { + type: "object", + properties: { + language: { type: "string" }, + confidence: { type: "number", minimum: 0, maximum: 1 }, + code: { type: "string" } + }, + required: [ "language", "confidence", "code" ] + } + end + end + end +end diff --git a/lib/ruby_llm/text/generate_tags.rb b/lib/ruby_llm/text/generate_tags.rb new file mode 100644 index 0000000..f12666b --- /dev/null +++ b/lib/ruby_llm/text/generate_tags.rb @@ -0,0 +1,70 @@ +module RubyLLM + module Text + module GenerateTags + def self.call(text, max_tags: nil, style: :keywords, model: nil, **options) + model ||= RubyLLM::Text.config.model_for(:generate_tags) + + prompt = build_prompt(text, max_tags: max_tags, style: style) + response = Base.call_llm(prompt, model: model, **options) + + # Parse response into array of strings + parse_response(response, style) + end + + private + + def self.build_prompt(text, max_tags:, style:) + count_instruction = max_tags ? " (maximum #{max_tags} tags)" : "" + + style_instruction = case style + when :keywords + "Generate relevant keywords and key phrases that capture the main topics and concepts." + when :topics + "Generate broader topic categories and subject areas covered in the content." + when :hashtags + "Generate hashtag-style tags suitable for social media (include the # symbol)." + else + "Generate relevant keywords and key phrases that capture the main topics and concepts." + end + + format_instruction = case style + when :hashtags + "Format each tag as a hashtag (e.g., #ruby, #programming)." + else + "Return simple words or short phrases without special formatting." + end + + <<~PROMPT + Analyze the following text and generate relevant tags#{count_instruction}. + #{style_instruction} + #{format_instruction} + + Return only the tags, one per line, no preamble or explanation. + Each tag should be on a separate line. + + Text: + #{text} + PROMPT + end + + def self.parse_response(response, style) + lines = response.strip.split("\n").map(&:strip).reject(&:empty?) + + # Clean up formatting markers and normalize tags + lines.map do |line| + # Remove common formatting markers + cleaned = line.gsub(/^[β€’\*\-]\s*/, "") # Remove bullets + cleaned = cleaned.gsub(/^\d+\.\s*/, "") # Remove numbers + cleaned = cleaned.gsub(/^["']/, "").gsub(/["']$/, "") # Remove quotes more explicitly + + # Handle comma-separated tags on single line (some LLMs do this) + if cleaned.include?(",") && !cleaned.start_with?("#") + cleaned.split(",").map(&:strip).map { |tag| tag.gsub(/^["']/, "").gsub(/["']$/, "") } + else + cleaned + end + end.flatten.reject(&:empty?).uniq + end + end + end +end From c64f12e98f098b6948e8a26aa1a9d6921d8005ab Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Wed, 18 Feb 2026 22:03:51 +0700 Subject: [PATCH 2/9] Integrate new operations into existing infrastructure - Add require statements and module methods to main Text module - Add model configuration attributes for all new operations - Add String extension methods with special handling for compare() Co-Authored-By: Claude Sonnet 4 --- lib/ruby_llm/text.rb | 20 ++++++++++++++++++++ lib/ruby_llm/text/configuration.rb | 7 ++++++- lib/ruby_llm/text/string_ext.rb | 16 ++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/text.rb b/lib/ruby_llm/text.rb index f874427..d4b8e51 100644 --- a/lib/ruby_llm/text.rb +++ b/lib/ruby_llm/text.rb @@ -13,6 +13,10 @@ require_relative "text/key_points" require_relative "text/rewrite" require_relative "text/answer" +require_relative "text/detect_language" +require_relative "text/generate_tags" +require_relative "text/anonymize" +require_relative "text/compare" module RubyLLM module Text @@ -62,6 +66,22 @@ def rewrite(text, **options) def answer(text, question, **options) Answer.call(text, question, **options) end + + def detect_language(text, **options) + DetectLanguage.call(text, **options) + end + + def generate_tags(text, **options) + GenerateTags.call(text, **options) + end + + def anonymize(text, **options) + Anonymize.call(text, **options) + end + + def compare(text1, text2, **options) + Compare.call(text1, text2, **options) + end end end end diff --git a/lib/ruby_llm/text/configuration.rb b/lib/ruby_llm/text/configuration.rb index f8181db..5a3bb52 100644 --- a/lib/ruby_llm/text/configuration.rb +++ b/lib/ruby_llm/text/configuration.rb @@ -4,7 +4,8 @@ class Configuration # Method-specific model overrides (optional) # If not set, falls back to RubyLLM.config.default_model attr_accessor :summarize_model, :translate_model, - :extract_model, :classify_model, :grammar_model, :sentiment_model, :key_points_model, :rewrite_model, :answer_model + :extract_model, :classify_model, :grammar_model, :sentiment_model, :key_points_model, :rewrite_model, :answer_model, + :detect_language_model, :generate_tags_model, :anonymize_model, :compare_model # Default temperature for text operations attr_accessor :temperature @@ -20,6 +21,10 @@ def initialize @key_points_model = nil @rewrite_model = nil @answer_model = nil + @detect_language_model = nil + @generate_tags_model = nil + @anonymize_model = nil + @compare_model = nil end def model_for(method_name) diff --git a/lib/ruby_llm/text/string_ext.rb b/lib/ruby_llm/text/string_ext.rb index 831dbaa..6dcb9fb 100644 --- a/lib/ruby_llm/text/string_ext.rb +++ b/lib/ruby_llm/text/string_ext.rb @@ -35,4 +35,20 @@ def rewrite(**options) def answer(question, **options) RubyLLM::Text.answer(self, question, **options) end + + def detect_language(**options) + RubyLLM::Text.detect_language(self, **options) + end + + def generate_tags(**options) + RubyLLM::Text.generate_tags(self, **options) + end + + def anonymize(**options) + RubyLLM::Text.anonymize(self, **options) + end + + def compare(other_text, **options) + RubyLLM::Text.compare(self, other_text, **options) + end end From 0c86ba69da54b490807a2612ded4532231da5063 Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Wed, 18 Feb 2026 22:04:00 +0700 Subject: [PATCH 3/9] Add comprehensive test coverage for new operations - Complete test suites for all 4 operations with 151 total tests - Tests cover simple/structured responses, error handling, configuration - Updated String extension tests for new methods - All tests passing with established mocking patterns Co-Authored-By: Claude Sonnet 4 --- test/ruby_llm/text/anonymize_test.rb | 174 +++++++++++++++++++ test/ruby_llm/text/compare_test.rb | 191 +++++++++++++++++++++ test/ruby_llm/text/detect_language_test.rb | 118 +++++++++++++ test/ruby_llm/text/generate_tags_test.rb | 131 ++++++++++++++ test/ruby_llm/text/string_ext_test.rb | 29 ++++ 5 files changed, 643 insertions(+) create mode 100644 test/ruby_llm/text/anonymize_test.rb create mode 100644 test/ruby_llm/text/compare_test.rb create mode 100644 test/ruby_llm/text/detect_language_test.rb create mode 100644 test/ruby_llm/text/generate_tags_test.rb diff --git a/test/ruby_llm/text/anonymize_test.rb b/test/ruby_llm/text/anonymize_test.rb new file mode 100644 index 0000000..72330d3 --- /dev/null +++ b/test/ruby_llm/text/anonymize_test.rb @@ -0,0 +1,174 @@ +require "test_helper" + +class RubyLLM::Text::AnonymizeTest < Minitest::Test + def setup + @pii_text = "Contact John Doe at john.doe@example.com or call him at (555) 123-4567. His address is 123 Main St, Anytown, CA 90210." + @simple_text = "Hello world, this is a test message." + @email_only = "Please email support at help@company.com for assistance." + end + + def test_anonymizes_text_with_simple_response + anonymized = "Contact [PERSON] at [EMAIL] or call him at [PHONE]. His address is [ADDRESS]." + RubyLLM::Text::Base.stubs(:call_llm).returns(anonymized) + + result = RubyLLM::Text::Anonymize.call(@pii_text) + + assert_kind_of String, result + assert_equal anonymized, result + end + + def test_returns_structured_response_with_mapping + mapping_response = { + "text" => "Contact [PERSON_1] at [EMAIL_1] or call him at [PHONE_1]. His address is [ADDRESS_1].", + "mapping" => { + "[PERSON_1]" => "John Doe", + "[EMAIL_1]" => "john.doe@example.com", + "[PHONE_1]" => "(555) 123-4567", + "[ADDRESS_1]" => "123 Main St, Anytown, CA 90210" + } + }.to_json + + RubyLLM::Text::Base.stubs(:call_llm).returns(mapping_response) + + result = RubyLLM::Text::Anonymize.call(@pii_text, include_mapping: true) + + assert_kind_of Hash, result + assert result.key?("text") + assert result.key?("mapping") + assert_includes result["text"], "[PERSON_1]" + assert_equal "John Doe", result["mapping"]["[PERSON_1]"] + end + + def test_handles_json_parsing_failure_gracefully + RubyLLM::Text::Base.stubs(:call_llm).returns("Invalid JSON response") + + result = RubyLLM::Text::Anonymize.call(@pii_text, include_mapping: true) + + assert_kind_of Hash, result + assert_equal "Invalid JSON response", result["text"] + assert_equal({}, result["mapping"]) + end + + def test_handles_all_pii_types_shortcut + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @pii_text, + pii_types: [ :all ], + replacement_style: :generic, + include_mapping: false) + + assert_includes prompt, "Names" + assert_includes prompt, "Email addresses" + assert_includes prompt, "Phone numbers" + assert_includes prompt, "Physical addresses" + end + + def test_handles_selective_pii_types + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @email_only, + pii_types: [ :emails ], + replacement_style: :generic, + include_mapping: false) + + assert_includes prompt, "Email addresses" + refute_includes prompt, "Names" + refute_includes prompt, "Phone numbers" + end + + def test_builds_generic_replacement_instructions + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @pii_text, + pii_types: [ :names, :emails ], + replacement_style: :generic, + include_mapping: false) + + assert_includes prompt, "[PERSON]" + assert_includes prompt, "[EMAIL]" + assert_includes prompt, "[PERSON_1], [PERSON_2]" + end + + def test_builds_numbered_replacement_instructions + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @pii_text, + pii_types: [ :names ], + replacement_style: :numbered, + include_mapping: false) + + assert_includes prompt, "[PERSON_1], [PERSON_2]" + refute_includes prompt, "[PERSON], [PERSON_1]" + end + + def test_builds_descriptive_replacement_instructions + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @pii_text, + pii_types: [ :names, :emails ], + replacement_style: :descriptive, + include_mapping: false) + + assert_includes prompt, "[FIRST_NAME]" + assert_includes prompt, "[EMAIL_ADDRESS]" + end + + def test_builds_simple_output_instruction + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @pii_text, + pii_types: [ :names ], + replacement_style: :generic, + include_mapping: false) + + assert_includes prompt, "Return only the anonymized text" + refute_includes prompt, "JSON object" + end + + def test_builds_mapping_output_instruction + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @pii_text, + pii_types: [ :names ], + replacement_style: :generic, + include_mapping: true) + + assert_includes prompt, "Return a JSON object" + assert_includes prompt, "mapping" + assert_includes prompt, "original value" + end + + def test_passes_schema_when_include_mapping_true + RubyLLM::Text::Base.expects(:call_llm).with { |prompt, options| + options.key?(:schema) && + options[:schema][:type] == "object" && + options[:schema][:properties].key?(:text) && + options[:schema][:properties].key?(:mapping) + }.returns('{"text": "anonymized", "mapping": {}}') + + RubyLLM::Text::Anonymize.call(@pii_text, include_mapping: true) + end + + def test_supports_additional_pii_types + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, "SSN: 123-45-6789, Card: 4111111111111111", + pii_types: [ :ssn, :credit_cards ], + replacement_style: :generic, + include_mapping: false) + + assert_includes prompt, "Social Security Numbers" + assert_includes prompt, "Credit card numbers" + end + + def test_fallback_to_generic_for_unknown_replacement_style + prompt = RubyLLM::Text::Anonymize.send(:build_prompt, @pii_text, + pii_types: [ :names ], + replacement_style: :unknown, + include_mapping: false) + + assert_includes prompt, "[PERSON]" + assert_includes prompt, "[PERSON_1], [PERSON_2]" + end + + def test_uses_configured_model_when_specified + RubyLLM::Text.configure do |config| + config.anonymize_model = "claude-3-5-sonnet" + end + + RubyLLM::Text::Base.expects(:call_llm).with(anything, model: "claude-3-5-sonnet").returns("[PERSON]") + RubyLLM::Text::Anonymize.call(@pii_text) + end + + def test_module_level_api_delegates_correctly + RubyLLM::Text::Base.stubs(:call_llm).returns("Anonymized text") + + result = RubyLLM::Text.anonymize(@pii_text, pii_types: [ :emails ]) + + assert_equal "Anonymized text", result + end +end diff --git a/test/ruby_llm/text/compare_test.rb b/test/ruby_llm/text/compare_test.rb new file mode 100644 index 0000000..93750f2 --- /dev/null +++ b/test/ruby_llm/text/compare_test.rb @@ -0,0 +1,191 @@ +require "test_helper" + +class RubyLLM::Text::CompareTest < Minitest::Test + def setup + @text1 = "Ruby is a dynamic programming language with a focus on simplicity and productivity." + @text2 = "Ruby is an elegant programming language that emphasizes developer happiness and ease of use." + @different_text1 = "The weather today is sunny and warm." + @different_text2 = "Machine learning algorithms are transforming artificial intelligence." + end + + def test_compares_texts_with_similarity_response + similarity_response = { + "similarity" => 0.85, + "comparison_type" => "similarity", + "similarity_type" => "semantic", + "summary" => "Both texts describe Ruby programming language positively." + }.to_json + + RubyLLM::Text::Base.stubs(:call_llm).returns(similarity_response) + + result = RubyLLM::Text::Compare.call(@text1, @text2) + + assert_kind_of Hash, result + assert_equal 0.85, result["similarity"] + assert_equal "similarity", result["comparison_type"] + assert result["similarity"].is_a?(Float) + end + + def test_compares_texts_with_detailed_response + detailed_response = { + "similarity" => 0.75, + "comparison_type" => "detailed", + "differences" => [ "tone", "word choice" ], + "commonalities" => [ "topic", "positive sentiment" ], + "summary" => "Similar content with different presentation styles." + }.to_json + + RubyLLM::Text::Base.stubs(:call_llm).returns(detailed_response) + + result = RubyLLM::Text::Compare.call(@text1, @text2, comparison_type: :detailed) + + assert_kind_of Hash, result + assert_equal 0.75, result["similarity"] + assert_equal "detailed", result["comparison_type"] + assert_kind_of Array, result["differences"] + assert_kind_of Array, result["commonalities"] + assert_includes result["differences"], "tone" + assert_includes result["commonalities"], "topic" + end + + def test_compares_texts_with_changes_response + changes_response = { + "similarity" => 0.60, + "comparison_type" => "changes", + "change_types" => [ "word substitution", "tone modification" ], + "examples" => [ "'focus on simplicity' β†’ 'emphasizes developer happiness'" ], + "assessment" => "Minor improvements in clarity and expressiveness." + }.to_json + + RubyLLM::Text::Base.stubs(:call_llm).returns(changes_response) + + result = RubyLLM::Text::Compare.call(@text1, @text2, comparison_type: :changes) + + assert_kind_of Hash, result + assert_equal 0.60, result["similarity"] + assert_equal "changes", result["comparison_type"] + assert_kind_of Array, result["change_types"] + assert_kind_of Array, result["examples"] + assert_includes result["change_types"], "word substitution" + end + + def test_handles_json_parsing_failure_gracefully + RubyLLM::Text::Base.stubs(:call_llm).returns("Invalid JSON response") + + result = RubyLLM::Text::Compare.call(@text1, @text2) + + assert_kind_of Hash, result + assert_nil result["similarity"] + assert_equal "similarity", result["comparison_type"] + assert result.key?("error") + end + + def test_converts_similarity_to_float + similarity_response = { + "similarity" => 1, + "comparison_type" => "similarity" + }.to_json + + RubyLLM::Text::Base.stubs(:call_llm).returns(similarity_response) + + result = RubyLLM::Text::Compare.call(@text1, @text2) + + assert_equal 1.0, result["similarity"] + assert result["similarity"].is_a?(Float) + end + + def test_builds_prompt_for_similarity_comparison + prompt = RubyLLM::Text::Compare.send(:build_prompt, @text1, @text2, comparison_type: :similarity) + + assert_includes prompt, "Compare the two texts" + assert_includes prompt, "similarity score from 0 to 1" + assert_includes prompt, "semantic similarity" + assert_includes prompt, @text1 + assert_includes prompt, @text2 + assert_includes prompt, "Text 1:" + assert_includes prompt, "Text 2:" + end + + def test_builds_prompt_for_detailed_comparison + prompt = RubyLLM::Text::Compare.send(:build_prompt, @text1, @text2, comparison_type: :detailed) + + assert_includes prompt, "detailed comparison" + assert_includes prompt, "differences between the texts" + assert_includes prompt, "Common elements" + assert_includes prompt, "style, tone, content, structure" + end + + def test_builds_prompt_for_changes_comparison + prompt = RubyLLM::Text::Compare.send(:build_prompt, @text1, @text2, comparison_type: :changes) + + assert_includes prompt, "revision of the first" + assert_includes prompt, "changes made" + assert_includes prompt, "additions, deletions, modifications" + assert_includes prompt, "tracking edits" + end + + def test_builds_correct_schema_for_similarity + schema = RubyLLM::Text::Compare.send(:build_comparison_schema, :similarity) + + assert_equal "object", schema[:type] + assert schema[:properties].key?(:similarity) + assert schema[:properties].key?(:similarity_type) + assert schema[:properties].key?(:summary) + assert_includes schema[:required], "similarity" + end + + def test_builds_correct_schema_for_detailed + schema = RubyLLM::Text::Compare.send(:build_comparison_schema, :detailed) + + assert schema[:properties].key?(:differences) + assert schema[:properties].key?(:commonalities) + assert_equal "array", schema[:properties][:differences][:type] + assert_equal "array", schema[:properties][:commonalities][:type] + end + + def test_builds_correct_schema_for_changes + schema = RubyLLM::Text::Compare.send(:build_comparison_schema, :changes) + + assert schema[:properties].key?(:change_types) + assert schema[:properties].key?(:examples) + assert schema[:properties].key?(:assessment) + assert_equal "array", schema[:properties][:change_types][:type] + end + + def test_always_passes_schema_for_structured_output + RubyLLM::Text::Base.expects(:call_llm).with { |prompt, options| + options.key?(:schema) && + options[:schema][:type] == "object" && + options[:schema][:properties].key?(:similarity) + }.returns('{"similarity": 0.8, "comparison_type": "similarity"}') + + RubyLLM::Text::Compare.call(@text1, @text2) + end + + def test_uses_configured_model_when_specified + RubyLLM::Text.configure do |config| + config.compare_model = "claude-3-5-sonnet" + end + + RubyLLM::Text::Base.expects(:call_llm).with { |prompt, options| + options[:model] == "claude-3-5-sonnet" + }.returns('{"similarity": 0.9, "comparison_type": "similarity"}') + RubyLLM::Text::Compare.call(@text1, @text2) + end + + def test_module_level_api_delegates_correctly + response = '{"similarity": 0.7, "comparison_type": "similarity"}' + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text.compare(@text1, @text2, comparison_type: :detailed) + + assert_equal 0.7, result["similarity"] + end + + def test_defaults_to_similarity_for_unknown_comparison_type + prompt = RubyLLM::Text::Compare.send(:build_prompt, @text1, @text2, comparison_type: :unknown) + + assert_includes prompt, "similarity score from 0 to 1" + assert_includes prompt, "semantic similarity" + end +end diff --git a/test/ruby_llm/text/detect_language_test.rb b/test/ruby_llm/text/detect_language_test.rb new file mode 100644 index 0000000..7e2698c --- /dev/null +++ b/test/ruby_llm/text/detect_language_test.rb @@ -0,0 +1,118 @@ +require "test_helper" + +class RubyLLM::Text::DetectLanguageTest < Minitest::Test + def setup + @english_text = "Hello, how are you doing today?" + @french_text = "Bonjour, comment allez-vous aujourd'hui?" + @multilingual_text = "Hello world. Bonjour le monde." + end + + def test_detects_language_with_simple_response + RubyLLM::Text::Base.stubs(:call_llm).returns("English") + + result = RubyLLM::Text::DetectLanguage.call(@english_text) + + assert_kind_of String, result + assert_equal "English", result + end + + def test_detects_language_with_confidence_scoring + confidence_response = { + "language" => "French", + "confidence" => 0.95, + "code" => "fr" + }.to_json + + RubyLLM::Text::Base.stubs(:call_llm).returns(confidence_response) + + result = RubyLLM::Text::DetectLanguage.call(@french_text, include_confidence: true) + + assert_kind_of Hash, result + assert_equal "French", result["language"] + assert_equal 0.95, result["confidence"] + assert_equal "fr", result["code"] + assert result["confidence"].is_a?(Float) + end + + def test_handles_unknown_language + RubyLLM::Text::Base.stubs(:call_llm).returns("unknown") + + result = RubyLLM::Text::DetectLanguage.call("xyz123!@#") + + assert_equal "unknown", result + end + + def test_handles_json_parsing_failure_gracefully + RubyLLM::Text::Base.stubs(:call_llm).returns("Invalid JSON response") + + result = RubyLLM::Text::DetectLanguage.call(@english_text, include_confidence: true) + + assert_kind_of Hash, result + assert_equal "Invalid JSON response", result["language"] + assert_nil result["confidence"] + assert_nil result["code"] + end + + def test_converts_confidence_to_float + confidence_response = { + "language" => "Spanish", + "confidence" => 1, + "code" => "es" + }.to_json + + RubyLLM::Text::Base.stubs(:call_llm).returns(confidence_response) + + result = RubyLLM::Text::DetectLanguage.call(@english_text, include_confidence: true) + + assert_equal 1.0, result["confidence"] + assert result["confidence"].is_a?(Float) + end + + def test_builds_correct_prompt_for_simple_detection + prompt = RubyLLM::Text::DetectLanguage.send(:build_prompt, @english_text, include_confidence: false) + + assert_includes prompt, "Detect the language" + assert_includes prompt, @english_text + assert_includes prompt, "Return only the full language name" + refute_includes prompt, "JSON object" + end + + def test_builds_correct_prompt_for_confidence_detection + prompt = RubyLLM::Text::DetectLanguage.send(:build_prompt, @french_text, include_confidence: true) + + assert_includes prompt, "Detect the language" + assert_includes prompt, @french_text + assert_includes prompt, "Return a JSON object" + assert_includes prompt, "confidence" + assert_includes prompt, "code" + end + + def test_passes_schema_when_include_confidence_true + RubyLLM::Text::Base.expects(:call_llm).with { |prompt, options| + options.key?(:schema) && + options[:schema][:type] == "object" && + options[:schema][:properties].key?(:language) && + options[:schema][:properties].key?(:confidence) && + options[:schema][:properties].key?(:code) + }.returns('{"language": "English", "confidence": 0.9, "code": "en"}') + + RubyLLM::Text::DetectLanguage.call(@english_text, include_confidence: true) + end + + def test_uses_configured_model_when_specified + RubyLLM::Text.configure do |config| + config.detect_language_model = "gpt-4o-mini" + end + + RubyLLM::Text::Base.expects(:call_llm).with(anything, model: "gpt-4o-mini").returns("English") + RubyLLM::Text::DetectLanguage.call(@english_text) + end + + def test_module_level_api_delegates_correctly + RubyLLM::Text::Base.stubs(:call_llm).returns("German") + + result = RubyLLM::Text.detect_language(@english_text) + + assert_equal "German", result + end +end diff --git a/test/ruby_llm/text/generate_tags_test.rb b/test/ruby_llm/text/generate_tags_test.rb new file mode 100644 index 0000000..2d1feaf --- /dev/null +++ b/test/ruby_llm/text/generate_tags_test.rb @@ -0,0 +1,131 @@ +require "test_helper" + +class RubyLLM::Text::GenerateTagsTest < Minitest::Test + def setup + @blog_post = "Ruby on Rails is a web application framework written in Ruby. It follows the Model-View-Controller (MVC) pattern and emphasizes convention over configuration." + @tech_article = "Machine learning algorithms are transforming artificial intelligence and data science applications across industries." + end + + def test_generates_tags_as_array + response = "ruby\nrails\nweb development\nMVC\nframework" + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text::GenerateTags.call(@blog_post) + + assert_kind_of Array, result + assert_equal 5, result.length + assert_includes result, "ruby" + assert_includes result, "web development" + end + + def test_handles_comma_separated_response + response = "machine learning, artificial intelligence, data science, algorithms" + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text::GenerateTags.call(@tech_article) + + assert_kind_of Array, result + assert_equal 4, result.length + assert_includes result, "machine learning" + assert_includes result, "data science" + end + + def test_cleans_formatting_markers + response = "β€’ programming\n- web development\n1. ruby\n2. rails" + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text::GenerateTags.call(@blog_post) + + assert_equal [ "programming", "web development", "ruby", "rails" ], result + end + + def test_removes_duplicates + response = "ruby\nrails\nruby\nweb development\nrails" + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text::GenerateTags.call(@blog_post) + + assert_equal [ "ruby", "rails", "web development" ], result + end + + def test_handles_empty_lines_in_response + response = "programming\n\n\nruby\n\nrails\n" + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text::GenerateTags.call(@blog_post) + + assert_equal [ "programming", "ruby", "rails" ], result + end + + def test_respects_max_tags_parameter + prompt = RubyLLM::Text::GenerateTags.send(:build_prompt, @blog_post, max_tags: 3, style: :keywords) + + assert_includes prompt, "maximum 3 tags" + end + + def test_builds_prompt_for_keywords_style + prompt = RubyLLM::Text::GenerateTags.send(:build_prompt, @blog_post, max_tags: nil, style: :keywords) + + assert_includes prompt, "Generate relevant keywords" + assert_includes prompt, "simple words or short phrases" + assert_includes prompt, @blog_post + end + + def test_builds_prompt_for_topics_style + prompt = RubyLLM::Text::GenerateTags.send(:build_prompt, @tech_article, max_tags: nil, style: :topics) + + assert_includes prompt, "broader topic categories" + assert_includes prompt, "subject areas" + assert_includes prompt, @tech_article + end + + def test_builds_prompt_for_hashtags_style + prompt = RubyLLM::Text::GenerateTags.send(:build_prompt, @blog_post, max_tags: 5, style: :hashtags) + + assert_includes prompt, "hashtag-style tags" + assert_includes prompt, "include the # symbol" + assert_includes prompt, "maximum 5 tags" + end + + def test_handles_hashtag_formatted_response + response = "#ruby\n#rails\n#webdevelopment\n#programming" + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text::GenerateTags.call(@blog_post, style: :hashtags) + + assert_includes result, "#ruby" + assert_includes result, "#webdevelopment" + end + + def test_removes_quotes_from_tags + response = "\"programming\"\n\"web development\"\n\"ruby\"" + RubyLLM::Text::Base.stubs(:call_llm).returns(response) + + result = RubyLLM::Text::GenerateTags.call(@blog_post) + + assert_equal [ "programming", "web development", "ruby" ], result + end + + def test_uses_configured_model_when_specified + RubyLLM::Text.configure do |config| + config.generate_tags_model = "gpt-4o-mini" + end + + RubyLLM::Text::Base.expects(:call_llm).with(anything, model: "gpt-4o-mini").returns("ruby\nrails") + RubyLLM::Text::GenerateTags.call(@blog_post) + end + + def test_module_level_api_delegates_correctly + RubyLLM::Text::Base.stubs(:call_llm).returns("tech\nAI\ndata") + + result = RubyLLM::Text.generate_tags(@tech_article, max_tags: 3) + + assert_equal [ "tech", "AI", "data" ], result + end + + def test_defaults_to_keywords_style_for_unknown_style + prompt = RubyLLM::Text::GenerateTags.send(:build_prompt, @blog_post, max_tags: nil, style: :unknown) + + assert_includes prompt, "Generate relevant keywords" + end +end diff --git a/test/ruby_llm/text/string_ext_test.rb b/test/ruby_llm/text/string_ext_test.rb index 30981a1..5339fcb 100644 --- a/test/ruby_llm/text/string_ext_test.rb +++ b/test/ruby_llm/text/string_ext_test.rb @@ -69,4 +69,33 @@ def test_answer_delegates_to_text_answer result = @text.answer(question, include_confidence: true) assert_equal({ "answer" => "testing", "confidence" => 0.9 }, result) end + + def test_detect_language_delegates_to_text_detect_language + RubyLLM::Text.expects(:detect_language).with(@text, include_confidence: false).returns("English") + + result = @text.detect_language(include_confidence: false) + assert_equal "English", result + end + + def test_generate_tags_delegates_to_text_generate_tags + RubyLLM::Text.expects(:generate_tags).with(@text, max_tags: 5, style: :keywords).returns([ "tag1", "tag2" ]) + + result = @text.generate_tags(max_tags: 5, style: :keywords) + assert_equal [ "tag1", "tag2" ], result + end + + def test_anonymize_delegates_to_text_anonymize + RubyLLM::Text.expects(:anonymize).with(@text, pii_types: [ :emails ], include_mapping: false).returns("anonymized text") + + result = @text.anonymize(pii_types: [ :emails ], include_mapping: false) + assert_equal "anonymized text", result + end + + def test_compare_delegates_to_text_compare_with_other_text + other_text = "Another text to compare" + RubyLLM::Text.expects(:compare).with(@text, other_text, comparison_type: :detailed).returns({ "similarity" => 0.8 }) + + result = @text.compare(other_text, comparison_type: :detailed) + assert_equal({ "similarity" => 0.8 }, result) + end end From 6adade412082c03244981fdc5927ed56e27d1a3a Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Thu, 19 Feb 2026 13:26:11 +0700 Subject: [PATCH 4/9] Add input validation with helpful error messages - Create Validation module with ValidationError class - Add validate_text!, validate_required!, validate_array!, validate_one_of! helpers - All 13 operations now validate inputs before LLM calls - Consistent error messages: "text cannot be nil", "schema is required", etc. - Update manual-test binstub with Phase 3 operations - Full test coverage for validation (186 tests, 516 assertions) Co-Authored-By: Claude Opus 4.5 --- bin/manual-test | 72 ++++++++- lib/ruby_llm/text.rb | 1 + lib/ruby_llm/text/anonymize.rb | 1 + lib/ruby_llm/text/answer.rb | 4 +- lib/ruby_llm/text/classify.rb | 3 +- lib/ruby_llm/text/compare.rb | 2 + lib/ruby_llm/text/detect_language.rb | 1 + lib/ruby_llm/text/extract.rb | 3 +- lib/ruby_llm/text/generate_tags.rb | 1 + lib/ruby_llm/text/grammar.rb | 1 + lib/ruby_llm/text/key_points.rb | 1 + lib/ruby_llm/text/rewrite.rb | 7 +- lib/ruby_llm/text/sentiment.rb | 1 + lib/ruby_llm/text/summarize.rb | 1 + lib/ruby_llm/text/translate.rb | 2 + lib/ruby_llm/text/validation.rb | 41 +++++ test/ruby_llm/text/classify_test.rb | 4 +- test/ruby_llm/text/extract_test.rb | 4 +- test/ruby_llm/text/rewrite_test.rb | 2 +- test/ruby_llm/text/validation_test.rb | 222 ++++++++++++++++++++++++++ 20 files changed, 356 insertions(+), 18 deletions(-) create mode 100644 lib/ruby_llm/text/validation.rb create mode 100644 test/ruby_llm/text/validation_test.rb diff --git a/bin/manual-test b/bin/manual-test index ea7d710..cd4e8ce 100755 --- a/bin/manual-test +++ b/bin/manual-test @@ -155,8 +155,64 @@ end puts "\n" + "="*50 + "\n" -# Test 10: String extensions (if available) -puts "πŸ”Ÿ Testing string extensions..." +# Test 10: Detect Language (Phase 3) +puts "πŸ”Ÿ Testing detect_language..." +begin + language = RubyLLM::Text.detect_language("Bonjour, comment allez-vous?") + language_with_confidence = RubyLLM::Text.detect_language("Hola, ΒΏcΓ³mo estΓ‘s?", include_confidence: true) + puts " βœ… French text detected: #{language}" + puts " βœ… Spanish with confidence: #{language_with_confidence}" +rescue => e + puts " ❌ Error: #{e.message}" +end + +puts "\n" + "="*50 + "\n" + +# Test 11: Generate Tags (Phase 3) +puts "1️⃣1️⃣ Testing generate_tags..." +begin + tags = RubyLLM::Text.generate_tags(article, max_tags: 5) + hashtags = RubyLLM::Text.generate_tags(article, style: :hashtags, max_tags: 3) + puts " βœ… Article tags: #{tags.join(', ')}" + puts " βœ… Hashtags: #{hashtags.join(' ')}" +rescue => e + puts " ❌ Error: #{e.message}" +end + +puts "\n" + "="*50 + "\n" + +# Test 12: Anonymize (Phase 3) +pii_text = "Contact John Smith at john.smith@email.com or call 555-123-4567. He lives at 123 Main St, Boston." +puts "1️⃣2️⃣ Testing anonymize..." +begin + anonymized = RubyLLM::Text.anonymize(pii_text) + anonymized_with_mapping = RubyLLM::Text.anonymize(pii_text, include_mapping: true) + puts " βœ… Original: #{pii_text}" + puts " πŸ”’ Anonymized: #{anonymized}" + puts " πŸ”’ With mapping: #{anonymized_with_mapping}" +rescue => e + puts " ❌ Error: #{e.message}" +end + +puts "\n" + "="*50 + "\n" + +# Test 13: Compare (Phase 3) +text_a = "Ruby is a dynamic programming language focused on simplicity." +text_b = "Ruby is an interpreted language that emphasizes developer productivity." +puts "1️⃣3️⃣ Testing compare..." +begin + comparison = RubyLLM::Text.compare(text_a, text_b) + puts " βœ… Text A: #{text_a}" + puts " βœ… Text B: #{text_b}" + puts " πŸ“Š Comparison: #{comparison}" +rescue => e + puts " ❌ Error: #{e.message}" +end + +puts "\n" + "="*50 + "\n" + +# Test 14: String extensions (if available) +puts "1️⃣4️⃣ Testing string extensions..." begin require 'ruby_llm/text/string_ext' @@ -170,13 +226,20 @@ begin puts " βœ… Phase 2 extensions work!" puts " πŸ”§ Grammar: #{corrected_ext}" puts " 😊 Sentiment: #{sentiment_ext}" + + # Test Phase 3 extensions + lang = "Guten Tag!".detect_language + tags = article.generate_tags(max_tags: 3) + puts " βœ… Phase 3 extensions work!" + puts " 🌍 Language: #{lang}" + puts " 🏷️ Tags: #{tags.join(', ')}" rescue LoadError puts " ⚠️ String extensions not loaded (optional)" rescue => e puts " ❌ Error: #{e.message}" end -puts "\nπŸŽ‰ Manual testing complete - Phase 1 & 2!" +puts "\nπŸŽ‰ Manual testing complete - Phase 1, 2 & 3!" puts "\nπŸ’‘ Tips:" puts " β€’ Try different models: RubyLLM::Text.summarize(text, model: 'claude-sonnet-4-5')" puts " β€’ Configure method-specific models:" @@ -184,4 +247,5 @@ puts " RubyLLM::Text.configure do |config|" puts " config.sentiment_model = 'claude-haiku-4-5'" puts " config.rewrite_model = 'gpt-4.1'" puts " end" -puts " β€’ All Phase 2 methods: fix_grammar, sentiment, key_points, rewrite, answer" \ No newline at end of file +puts " β€’ Phase 2 methods: fix_grammar, sentiment, key_points, rewrite, answer" +puts " β€’ Phase 3 methods: detect_language, generate_tags, anonymize, compare" \ No newline at end of file diff --git a/lib/ruby_llm/text.rb b/lib/ruby_llm/text.rb index d4b8e51..8803016 100644 --- a/lib/ruby_llm/text.rb +++ b/lib/ruby_llm/text.rb @@ -4,6 +4,7 @@ require_relative "text/version" require_relative "text/configuration" require_relative "text/base" +require_relative "text/validation" require_relative "text/summarize" require_relative "text/translate" require_relative "text/extract" diff --git a/lib/ruby_llm/text/anonymize.rb b/lib/ruby_llm/text/anonymize.rb index 13f4c4a..d393410 100644 --- a/lib/ruby_llm/text/anonymize.rb +++ b/lib/ruby_llm/text/anonymize.rb @@ -5,6 +5,7 @@ module Anonymize DEFAULT_PII_TYPES = [ :names, :emails, :phones, :addresses ].freeze def self.call(text, pii_types: DEFAULT_PII_TYPES, replacement_style: :generic, include_mapping: false, model: nil, **options) + Validation.validate_text!(text) model ||= RubyLLM::Text.config.model_for(:anonymize) # Handle :all shortcut for all PII types diff --git a/lib/ruby_llm/text/answer.rb b/lib/ruby_llm/text/answer.rb index 5d70ae1..b8fbc09 100644 --- a/lib/ruby_llm/text/answer.rb +++ b/lib/ruby_llm/text/answer.rb @@ -2,8 +2,8 @@ module RubyLLM module Text module Answer def self.call(text, question, include_confidence: false, model: nil, **options) - raise ArgumentError, "question is required" if question.nil? || question.strip.empty? - + Validation.validate_text!(text) + Validation.validate_text!(question, param_name: "question") model ||= RubyLLM::Text.config.model_for(:answer) prompt = build_prompt(text, question, include_confidence: include_confidence) diff --git a/lib/ruby_llm/text/classify.rb b/lib/ruby_llm/text/classify.rb index 4ffdf17..16c1f72 100644 --- a/lib/ruby_llm/text/classify.rb +++ b/lib/ruby_llm/text/classify.rb @@ -2,8 +2,9 @@ module RubyLLM module Text module Classify def self.call(text, categories:, model: nil, **options) + Validation.validate_text!(text) + Validation.validate_array!(categories, "categories") model ||= RubyLLM::Text.config.model_for(:classify) - raise ArgumentError, "categories are required" if categories.empty? prompt = build_prompt(text, categories) Base.call_llm(prompt, model: model, **options) diff --git a/lib/ruby_llm/text/compare.rb b/lib/ruby_llm/text/compare.rb index 54dc6f3..f44108f 100644 --- a/lib/ruby_llm/text/compare.rb +++ b/lib/ruby_llm/text/compare.rb @@ -2,6 +2,8 @@ module RubyLLM module Text module Compare def self.call(text1, text2, comparison_type: :similarity, model: nil, **options) + Validation.validate_text!(text1, param_name: "text1") + Validation.validate_text!(text2, param_name: "text2") model ||= RubyLLM::Text.config.model_for(:compare) prompt = build_prompt(text1, text2, comparison_type: comparison_type) diff --git a/lib/ruby_llm/text/detect_language.rb b/lib/ruby_llm/text/detect_language.rb index 72ff48e..1dfdcd4 100644 --- a/lib/ruby_llm/text/detect_language.rb +++ b/lib/ruby_llm/text/detect_language.rb @@ -2,6 +2,7 @@ module RubyLLM module Text module DetectLanguage def self.call(text, include_confidence: false, model: nil, **options) + Validation.validate_text!(text) model ||= RubyLLM::Text.config.model_for(:detect_language) prompt = build_prompt(text, include_confidence: include_confidence) diff --git a/lib/ruby_llm/text/extract.rb b/lib/ruby_llm/text/extract.rb index 50373ab..0369f5f 100644 --- a/lib/ruby_llm/text/extract.rb +++ b/lib/ruby_llm/text/extract.rb @@ -2,8 +2,9 @@ module RubyLLM module Text module Extract def self.call(text, schema: nil, model: nil, **options) + Validation.validate_text!(text) + Validation.validate_required!(schema, "schema") model ||= RubyLLM::Text.config.model_for(:extract) - raise ArgumentError, "schema is required for extraction" unless schema prompt = build_prompt(text, schema) diff --git a/lib/ruby_llm/text/generate_tags.rb b/lib/ruby_llm/text/generate_tags.rb index f12666b..7bbdc8a 100644 --- a/lib/ruby_llm/text/generate_tags.rb +++ b/lib/ruby_llm/text/generate_tags.rb @@ -2,6 +2,7 @@ module RubyLLM module Text module GenerateTags def self.call(text, max_tags: nil, style: :keywords, model: nil, **options) + Validation.validate_text!(text) model ||= RubyLLM::Text.config.model_for(:generate_tags) prompt = build_prompt(text, max_tags: max_tags, style: style) diff --git a/lib/ruby_llm/text/grammar.rb b/lib/ruby_llm/text/grammar.rb index 9226765..c34ead1 100644 --- a/lib/ruby_llm/text/grammar.rb +++ b/lib/ruby_llm/text/grammar.rb @@ -2,6 +2,7 @@ module RubyLLM module Text module Grammar def self.call(text, explain: false, preserve_style: false, model: nil, **options) + Validation.validate_text!(text) model ||= RubyLLM::Text.config.model_for(:grammar) prompt = build_prompt(text, explain: explain, preserve_style: preserve_style) diff --git a/lib/ruby_llm/text/key_points.rb b/lib/ruby_llm/text/key_points.rb index d4d1516..866db1d 100644 --- a/lib/ruby_llm/text/key_points.rb +++ b/lib/ruby_llm/text/key_points.rb @@ -2,6 +2,7 @@ module RubyLLM module Text module KeyPoints def self.call(text, max_points: nil, format: :sentences, model: nil, **options) + Validation.validate_text!(text) model ||= RubyLLM::Text.config.model_for(:key_points) prompt = build_prompt(text, max_points: max_points, format: format) diff --git a/lib/ruby_llm/text/rewrite.rb b/lib/ruby_llm/text/rewrite.rb index 5be7a1b..a23b9a8 100644 --- a/lib/ruby_llm/text/rewrite.rb +++ b/lib/ruby_llm/text/rewrite.rb @@ -17,13 +17,10 @@ module Rewrite }.freeze def self.call(text, tone: nil, style: nil, instruction: nil, model: nil, **options) + Validation.validate_text!(text) + Validation.validate_one_of!({ tone: tone, style: style, instruction: instruction }, %w[tone style instruction]) model ||= RubyLLM::Text.config.model_for(:rewrite) - # Validate that at least one transformation is specified - if tone.nil? && style.nil? && instruction.nil? - raise ArgumentError, "Must specify at least one of: tone, style, or instruction" - end - prompt = build_prompt(text, tone: tone, style: style, instruction: instruction) Base.call_llm(prompt, model: model, **options) end diff --git a/lib/ruby_llm/text/sentiment.rb b/lib/ruby_llm/text/sentiment.rb index dd113f0..7e8e743 100644 --- a/lib/ruby_llm/text/sentiment.rb +++ b/lib/ruby_llm/text/sentiment.rb @@ -4,6 +4,7 @@ module Sentiment DEFAULT_CATEGORIES = [ "positive", "negative", "neutral" ].freeze def self.call(text, categories: DEFAULT_CATEGORIES, simple: false, model: nil, **options) + Validation.validate_text!(text) model ||= RubyLLM::Text.config.model_for(:sentiment) prompt = build_prompt(text, categories: categories, simple: simple) diff --git a/lib/ruby_llm/text/summarize.rb b/lib/ruby_llm/text/summarize.rb index 5627163..b8453a2 100644 --- a/lib/ruby_llm/text/summarize.rb +++ b/lib/ruby_llm/text/summarize.rb @@ -8,6 +8,7 @@ module Summarize }.freeze def self.call(text, length: :medium, max_words: nil, model: nil, **options) + Validation.validate_text!(text) model ||= RubyLLM::Text.config.model_for(:summarize) prompt = build_prompt(text, length: length, max_words: max_words) diff --git a/lib/ruby_llm/text/translate.rb b/lib/ruby_llm/text/translate.rb index f70204e..46714e6 100644 --- a/lib/ruby_llm/text/translate.rb +++ b/lib/ruby_llm/text/translate.rb @@ -2,6 +2,8 @@ module RubyLLM module Text module Translate def self.call(text, to:, from: nil, model: nil, **options) + Validation.validate_text!(text) + Validation.validate_required!(to, "to") model ||= RubyLLM::Text.config.model_for(:translate) prompt = build_prompt(text, to: to, from: from) diff --git a/lib/ruby_llm/text/validation.rb b/lib/ruby_llm/text/validation.rb new file mode 100644 index 0000000..b2eba4b --- /dev/null +++ b/lib/ruby_llm/text/validation.rb @@ -0,0 +1,41 @@ +module RubyLLM + module Text + module Validation + class ValidationError < Error; end + + def self.validate_text!(text, param_name: "text") + if text.nil? + raise ValidationError, "#{param_name} cannot be nil" + end + unless text.is_a?(String) + raise ValidationError, "#{param_name} must be a String, got #{text.class}" + end + if text.strip.empty? + raise ValidationError, "#{param_name} cannot be empty" + end + end + + def self.validate_required!(value, param_name) + if value.nil? + raise ValidationError, "#{param_name} is required" + end + end + + def self.validate_array!(value, param_name, min_size: 1) + validate_required!(value, param_name) + unless value.is_a?(Array) + raise ValidationError, "#{param_name} must be an Array, got #{value.class}" + end + if value.size < min_size + raise ValidationError, "#{param_name} must have at least #{min_size} element(s)" + end + end + + def self.validate_one_of!(options, names) + if options.values.all?(&:nil?) + raise ValidationError, "must specify at least one of: #{names.join(', ')}" + end + end + end + end +end diff --git a/test/ruby_llm/text/classify_test.rb b/test/ruby_llm/text/classify_test.rb index 328a472..3f1470c 100644 --- a/test/ruby_llm/text/classify_test.rb +++ b/test/ruby_llm/text/classify_test.rb @@ -15,10 +15,10 @@ def test_classifies_text_into_provided_categories end def test_raises_error_when_categories_are_empty - error = assert_raises(ArgumentError) do + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do RubyLLM::Text::Classify.call(@text, categories: []) end - assert_equal "categories are required", error.message + assert_equal "categories must have at least 1 element(s)", error.message end def test_builds_correct_prompt_with_category_list diff --git a/test/ruby_llm/text/extract_test.rb b/test/ruby_llm/text/extract_test.rb index 5d104d2..62b73f5 100644 --- a/test/ruby_llm/text/extract_test.rb +++ b/test/ruby_llm/text/extract_test.rb @@ -15,10 +15,10 @@ def test_extracts_structured_data_from_text end def test_raises_error_when_schema_is_missing - error = assert_raises(ArgumentError) do + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do RubyLLM::Text::Extract.call(@text) end - assert_equal "schema is required for extraction", error.message + assert_equal "schema is required", error.message end def test_builds_correct_prompt_with_schema_fields diff --git a/test/ruby_llm/text/rewrite_test.rb b/test/ruby_llm/text/rewrite_test.rb index 6930e9d..db7212b 100644 --- a/test/ruby_llm/text/rewrite_test.rb +++ b/test/ruby_llm/text/rewrite_test.rb @@ -29,7 +29,7 @@ def test_rewrites_text_with_custom_instruction end def test_raises_error_when_no_transformation_specified - assert_raises(ArgumentError) do + assert_raises(RubyLLM::Text::Validation::ValidationError) do RubyLLM::Text::Rewrite.call(@casual_text) end end diff --git a/test/ruby_llm/text/validation_test.rb b/test/ruby_llm/text/validation_test.rb new file mode 100644 index 0000000..c50dcbc --- /dev/null +++ b/test/ruby_llm/text/validation_test.rb @@ -0,0 +1,222 @@ +require "test_helper" + +class RubyLLM::Text::ValidationTest < Minitest::Test + # Test validate_text! + def test_validate_text_raises_on_nil + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_text!(nil) + end + assert_equal "text cannot be nil", error.message + end + + def test_validate_text_raises_on_empty_string + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_text!("") + end + assert_equal "text cannot be empty", error.message + end + + def test_validate_text_raises_on_whitespace_only + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_text!(" \n\t ") + end + assert_equal "text cannot be empty", error.message + end + + def test_validate_text_raises_on_non_string + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_text!(123) + end + assert_equal "text must be a String, got Integer", error.message + end + + def test_validate_text_uses_custom_param_name + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_text!(nil, param_name: "question") + end + assert_equal "question cannot be nil", error.message + end + + def test_validate_text_passes_for_valid_string + assert_nil RubyLLM::Text::Validation.validate_text!("Hello world") + end + + # Test validate_required! + def test_validate_required_raises_on_nil + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_required!(nil, "schema") + end + assert_equal "schema is required", error.message + end + + def test_validate_required_passes_for_any_value + assert_nil RubyLLM::Text::Validation.validate_required!("value", "param") + assert_nil RubyLLM::Text::Validation.validate_required!([], "param") + assert_nil RubyLLM::Text::Validation.validate_required!(false, "param") + end + + # Test validate_array! + def test_validate_array_raises_on_nil + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_array!(nil, "categories") + end + assert_equal "categories is required", error.message + end + + def test_validate_array_raises_on_non_array + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_array!("not an array", "categories") + end + assert_equal "categories must be an Array, got String", error.message + end + + def test_validate_array_raises_on_empty_array + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_array!([], "categories") + end + assert_equal "categories must have at least 1 element(s)", error.message + end + + def test_validate_array_respects_min_size + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_array!([ "one" ], "categories", min_size: 2) + end + assert_equal "categories must have at least 2 element(s)", error.message + end + + def test_validate_array_passes_for_valid_array + assert_nil RubyLLM::Text::Validation.validate_array!([ "a", "b" ], "categories") + end + + # Test validate_one_of! + def test_validate_one_of_raises_when_all_nil + error = assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text::Validation.validate_one_of!({ tone: nil, style: nil }, %w[tone style]) + end + assert_equal "must specify at least one of: tone, style", error.message + end + + def test_validate_one_of_passes_when_one_present + assert_nil RubyLLM::Text::Validation.validate_one_of!({ tone: :casual, style: nil }, %w[tone style]) + end + + # Test ValidationError is a subclass of Error + def test_validation_error_is_subclass_of_error + assert RubyLLM::Text::Validation::ValidationError < RubyLLM::Text::Error + end + + # Integration tests - operations validate their inputs + def test_summarize_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.summarize(nil) + end + end + + def test_translate_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.translate(nil, to: "French") + end + end + + def test_translate_validates_to_param + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.translate("Hello", to: nil) + end + end + + def test_extract_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.extract(nil, schema: { name: :string }) + end + end + + def test_extract_validates_schema + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.extract("Hello", schema: nil) + end + end + + def test_classify_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.classify(nil, categories: [ "a", "b" ]) + end + end + + def test_classify_validates_categories + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.classify("Hello", categories: []) + end + end + + def test_answer_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.answer(nil, "What?") + end + end + + def test_answer_validates_question + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.answer("Hello", nil) + end + end + + def test_rewrite_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.rewrite(nil, tone: :casual) + end + end + + def test_rewrite_validates_options + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.rewrite("Hello") + end + end + + def test_compare_validates_text1 + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.compare(nil, "text2") + end + end + + def test_compare_validates_text2 + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.compare("text1", nil) + end + end + + def test_detect_language_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.detect_language(nil) + end + end + + def test_generate_tags_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.generate_tags(nil) + end + end + + def test_anonymize_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.anonymize(nil) + end + end + + def test_fix_grammar_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.fix_grammar(nil) + end + end + + def test_sentiment_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.sentiment(nil) + end + end + + def test_key_points_validates_text + assert_raises(RubyLLM::Text::Validation::ValidationError) do + RubyLLM::Text.key_points(nil) + end + end +end From 1929c47a36a43c3727209fb48b2a5e68003d360f Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Thu, 19 Feb 2026 13:47:22 +0700 Subject: [PATCH 5/9] Add AI.md --- AI.md | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 AI.md diff --git a/AI.md b/AI.md new file mode 100644 index 0000000..8d0c17a --- /dev/null +++ b/AI.md @@ -0,0 +1,124 @@ +# AI.md - Context for AI Assistants + +## Project Overview + +`ruby_llm-text` is a Ruby gem that provides ActiveSupport-style LLM utilities. It offers intuitive one-liner methods for common text operations powered by LLMs, making AI operations feel like native Ruby. + +**Repository:** https://github.com/patrols/ruby_llm-text +**License:** MIT +**Ruby:** >= 3.2.0 +**Core Dependency:** ruby_llm (~> 1.0) + +## Architecture + +### Directory Structure + +``` +lib/ruby_llm/ +β”œβ”€β”€ text.rb # Main module, public API entry point +└── text/ + β”œβ”€β”€ base.rb # Shared LLM calling logic & schema building + β”œβ”€β”€ configuration.rb # Text-specific configuration + β”œβ”€β”€ validation.rb # Input validation helpers + β”œβ”€β”€ string_ext.rb # Optional String monkey-patching + └── [operation].rb # Individual operation modules +``` + +### Operation Pattern + +Each operation follows a consistent pattern: + +1. Module under `RubyLLM::Text::[OperationName]` +2. Class method `self.call(text, **options)` as entry point +3. Uses `Validation.validate_text!` for input validation +4. Calls `Base.call_llm(prompt, model:, **options)` to execute +5. Returns processed result (string, hash, or array depending on operation) + +Example operation file structure: +```ruby +module RubyLLM::Text::[OperationName] + def self.call(text, **options) + Validation.validate_text!(text) + model ||= RubyLLM::Text.config.model_for(:operation_name) + prompt = build_prompt(text, **options) + Base.call_llm(prompt, model: model, **options) + end + + def self.build_prompt(text, **options) + # Build LLM prompt + end +end +``` + +### Available Operations + +| Method | Description | +|--------|-------------| +| `summarize` | Condense text to shorter summary | +| `translate` | Translate between languages | +| `extract` | Extract structured data from text | +| `classify` | Classify into predefined categories | +| `fix_grammar` | Correct grammar/spelling errors | +| `sentiment` | Analyze sentiment with confidence | +| `key_points` | Extract main points | +| `rewrite` | Transform tone and style | +| `answer` | Answer questions about text | +| `detect_language` | Identify text language | +| `generate_tags` | Generate relevant tags | +| `anonymize` | Remove/mask PII | +| `compare` | Compare two texts | + +## Development + +### Running Tests + +```bash +bundle exec rake test # Run all tests +bundle exec rake rubocop # Run linter +bundle exec rake # Run both +``` + +### Test Pattern + +Tests use Minitest with Mocha for mocking. Each operation has a corresponding `test/ruby_llm/text/[operation]_test.rb` file. Tests mock the LLM responses using: + +```ruby +mock_chat = mock("chat") +mock_response = mock("response") +RubyLLM.expects(:chat).returns(mock_chat) +mock_chat.stubs(:with_temperature).returns(mock_chat) +mock_chat.expects(:ask).returns(mock_response) +mock_response.expects(:content).returns("mocked response") +``` + +### Adding a New Operation + +1. Create `lib/ruby_llm/text/[operation].rb` following the operation pattern +2. Add `require_relative "text/[operation]"` to `lib/ruby_llm/text.rb` +3. Add module method in `RubyLLM::Text` class methods section +4. Add tests in `test/ruby_llm/text/[operation]_test.rb` +5. Update `lib/ruby_llm/text/string_ext.rb` if String extension desired +6. Document in README.md + +### Configuration + +Operations can be configured globally or per-call: + +```ruby +# Global configuration +RubyLLM::Text.configure do |config| + config.temperature = 0.3 + config.summarize_model = "gpt-4.1-mini" +end + +# Per-call override +RubyLLM::Text.summarize(text, model: "claude-sonnet-4-5") +``` + +## Code Style + +- Follow rubocop-rails-omakase conventions +- Keep operations focused and single-purpose +- Use keyword arguments for options +- Validate inputs early with helpful error messages +- Return clean data (strings, hashes, arrays) - not raw LLM response objects From b661b0da7f2aa564db9608c90ef1c62b162de666 Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Thu, 19 Feb 2026 14:32:46 +0700 Subject: [PATCH 6/9] Add documentation for Phase 3 operations Co-Authored-By: Claude Opus 4.5 --- README.md | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 48d4100..3f3b674 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ActiveSupport-style LLM utilities for Ruby that make AI operations feel like nat ## Overview -`ruby_llm-text` provides intuitive one-liner utility methods for common LLM tasks including text summarization, translation, data extraction, classification, grammar correction, sentiment analysis, key point extraction, text rewriting, and question answering. It integrates seamlessly with the [ruby_llm](https://github.com/crmne/ruby_llm) ecosystem, providing a simple interface without requiring chat objects, message arrays, or configuration boilerplate. +`ruby_llm-text` provides intuitive one-liner utility methods for common LLM tasks including text summarization, translation, data extraction, classification, grammar correction, sentiment analysis, key point extraction, text rewriting, question answering, language detection, tag generation, PII anonymization, and text comparison. It integrates seamlessly with the [ruby_llm](https://github.com/crmne/ruby_llm) ecosystem, providing a simple interface without requiring chat objects, message arrays, or configuration boilerplate. ## Installation @@ -374,6 +374,168 @@ RubyLLM::Text.answer(article, "What is Python?") # => "information not available" ``` +### Detect Language + +Identify the language of text. + +```ruby +RubyLLM::Text.detect_language(text, include_confidence: false, model: nil) +``` + +**Parameters:** + +- `text` (String): The text to analyze +- `include_confidence` (Boolean, optional): Return language code and confidence score (default: false) +- `model` (String, optional): Specific model to use + +**Examples:** + +```ruby +# Basic language detection +RubyLLM::Text.detect_language("Bonjour le monde") +# => "French" + +# With confidence and language code +RubyLLM::Text.detect_language("Hello world", include_confidence: true) +# => {"language" => "English", "confidence" => 0.98, "code" => "en"} + +# Mixed or ambiguous text +RubyLLM::Text.detect_language("Hola hello bonjour") +# => "Spanish" (or may return "unknown" for highly mixed text) +``` + +### Generate Tags + +Extract relevant tags and keywords from text. + +```ruby +RubyLLM::Text.generate_tags(text, max_tags: nil, style: :keywords, model: nil) +``` + +**Parameters:** + +- `text` (String): The text to extract tags from +- `max_tags` (Integer, optional): Maximum number of tags to generate +- `style` (Symbol, optional): Tag style (`:keywords`, `:topics`, `:hashtags`) +- `model` (String, optional): Specific model to use + +**Examples:** + +```ruby +# Basic tag generation +article = "Ruby is a dynamic programming language focused on simplicity..." +RubyLLM::Text.generate_tags(article) +# => ["ruby", "programming", "dynamic language", "simplicity"] + +# Limit number of tags +RubyLLM::Text.generate_tags(article, max_tags: 3) +# => ["ruby", "programming", "dynamic language"] + +# Topic-style tags (broader categories) +RubyLLM::Text.generate_tags(article, style: :topics) +# => ["Programming Languages", "Software Development", "Technology"] + +# Hashtag-style for social media +RubyLLM::Text.generate_tags(article, style: :hashtags) +# => ["#ruby", "#programming", "#coding", "#developer"] +``` + +### Anonymize + +Remove or replace personally identifiable information (PII) from text. + +```ruby +RubyLLM::Text.anonymize(text, pii_types: [:names, :emails, :phones, :addresses], replacement_style: :generic, include_mapping: false, model: nil) +``` + +**Parameters:** + +- `text` (String): The text to anonymize +- `pii_types` (Array, optional): Types of PII to detect (`:names`, `:emails`, `:phones`, `:addresses`, `:ssn`, `:credit_cards`) +- `replacement_style` (Symbol, optional): How to format replacements (`:generic`, `:numbered`, `:descriptive`) +- `include_mapping` (Boolean, optional): Return mapping of replacements to original values (default: false) +- `model` (String, optional): Specific model to use + +**Examples:** + +```ruby +# Basic anonymization +text = "Contact John Smith at john@example.com or 555-123-4567" +RubyLLM::Text.anonymize(text) +# => "Contact [PERSON] at [EMAIL] or [PHONE]" + +# With numbered replacements for multiple entities +text = "John and Jane work at 123 Main St" +RubyLLM::Text.anonymize(text, replacement_style: :numbered) +# => "[PERSON_1] and [PERSON_2] work at [ADDRESS_1]" + +# Get mapping of replacements +result = RubyLLM::Text.anonymize(text, include_mapping: true) +# => { +# "text" => "Contact [PERSON_1] at [EMAIL_1]", +# "mapping" => { +# "[PERSON_1]" => "John Smith", +# "[EMAIL_1]" => "john@example.com" +# } +# } + +# Selective PII types +RubyLLM::Text.anonymize(text, pii_types: [:emails, :phones]) +# => "Contact John Smith at [EMAIL] or [PHONE]" +``` + +### Compare + +Compare two texts for similarity and differences. + +```ruby +RubyLLM::Text.compare(text1, text2, comparison_type: :similarity, model: nil) +``` + +**Parameters:** + +- `text1` (String): The first text to compare +- `text2` (String): The second text to compare +- `comparison_type` (Symbol, optional): Type of comparison (`:similarity`, `:detailed`, `:changes`) +- `model` (String, optional): Specific model to use + +**Examples:** + +```ruby +# Basic similarity comparison +text1 = "The quick brown fox jumps over the lazy dog" +text2 = "A fast brown fox leaps over a sleepy dog" +RubyLLM::Text.compare(text1, text2) +# => { +# "similarity" => 0.85, +# "comparison_type" => "similarity", +# "similarity_type" => "semantic", +# "summary" => "Both texts describe a fox jumping over a dog..." +# } + +# Detailed comparison +RubyLLM::Text.compare(text1, text2, comparison_type: :detailed) +# => { +# "similarity" => 0.85, +# "comparison_type" => "detailed", +# "differences" => ["Word choice varies", "Adjectives differ"], +# "commonalities" => ["Same basic action", "Same subjects"], +# "summary" => "The texts convey the same meaning with different wording" +# } + +# Track changes between versions +original = "Our product costs $99 and ships in 3 days" +revised = "Our product costs $79 and ships in 2 days" +RubyLLM::Text.compare(original, revised, comparison_type: :changes) +# => { +# "similarity" => 0.90, +# "comparison_type" => "changes", +# "change_types" => ["modification"], +# "examples" => ["Price changed from $99 to $79", "Shipping time reduced"], +# "assessment" => "Minor updates to pricing and shipping information" +# } +``` + ## Configuration This gem uses `ruby_llm`'s configuration for API keys and default models: @@ -404,6 +566,10 @@ RubyLLM::Text.configure do |config| config.key_points_model = "gpt-4.1-mini" # Good for summarization tasks config.rewrite_model = "gpt-4.1" # Creative rewriting tasks config.answer_model = "claude-sonnet-4-5" # Strong reasoning for Q&A + config.detect_language_model = "gpt-4.1-mini" # Fast language detection + config.generate_tags_model = "gpt-4.1-mini" # Good for keyword extraction + config.anonymize_model = "gpt-4.1" # Accurate PII detection + config.compare_model = "claude-sonnet-4-5" # Strong for nuanced comparison end ``` @@ -434,6 +600,10 @@ require 'ruby_llm/text/string_ext' "Long meeting notes...".key_points(max_points: 3) "hey whats up".rewrite(tone: :professional) "Ruby was created in 1995".answer("When was Ruby created?") +"Bonjour le monde".detect_language +"Long article about Ruby...".generate_tags(max_tags: 5) +"Contact John at john@example.com".anonymize +"Text A".compare("Text B") ``` ## Integration with ruby_llm @@ -502,7 +672,7 @@ export ANTHROPIC_API_KEY="your-key" bin/manual-test ``` -This script tests all nine methods with real LLM APIs and provides helpful output for verification. +This script tests all methods with real LLM APIs and provides helpful output for verification. ## Contributing From 1a916f09655c8e1bd0e0d6d65809daa94f722620 Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Thu, 19 Feb 2026 14:33:40 +0700 Subject: [PATCH 7/9] Bump version to 0.3.0 Co-Authored-By: Claude Opus 4.5 --- CHANGELOG.md | 19 ++++++++++++++++++- lib/ruby_llm/text/version.rb | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea9a088..d3fd2da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.3.0] - 2025-02-19 + +### Added +- **Phase 3 Text Operations** - Four new methods for expanded text processing: + - `detect_language` - Identify the language of text with optional confidence scores and ISO codes + - `generate_tags` - Extract relevant tags/keywords with style options (keywords, topics, hashtags) + - `anonymize` - Remove or replace PII (names, emails, phones, addresses, SSN, credit cards) with configurable replacement styles + - `compare` - Compare two texts for similarity with detailed analysis options (similarity, detailed, changes) +- **Input Validation** - New `Validation` module providing consistent, helpful error messages across all operations +- **Extended String Extensions** - All Phase 3 methods available as String monkey-patches +- **Comprehensive Test Coverage** - Full test suites for all new operations and validation + +### Improved +- **Error Messages** - Clear, actionable error messages for invalid inputs +- **Test Coverage** - Expanded test suite covering all 13 methods + ## [0.2.0] - 2025-02-17 ### Added @@ -51,6 +67,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ruby_llm ~> 1.0 (core dependency) - Ruby >= 3.2.0 -[Unreleased]: https://github.com/patrols/ruby_llm-text/compare/v0.2.0...HEAD +[Unreleased]: https://github.com/patrols/ruby_llm-text/compare/v0.3.0...HEAD +[0.3.0]: https://github.com/patrols/ruby_llm-text/compare/v0.2.0...v0.3.0 [0.2.0]: https://github.com/patrols/ruby_llm-text/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/patrols/ruby_llm-text/releases/tag/v0.1.0 \ No newline at end of file diff --git a/lib/ruby_llm/text/version.rb b/lib/ruby_llm/text/version.rb index c36953d..c4034e4 100644 --- a/lib/ruby_llm/text/version.rb +++ b/lib/ruby_llm/text/version.rb @@ -1,5 +1,5 @@ module RubyLLM module Text - VERSION = "0.2.0" + VERSION = "0.3.0" end end From f53aa72bf9edfbc4790089d94ccf6fa31a4b1d04 Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Thu, 19 Feb 2026 14:53:18 +0700 Subject: [PATCH 8/9] Fix prompt nesting bug in compare.rb else branch The else branch was recursively calling build_prompt, which returns a complete prompt string. This caused the instruction to contain a nested prompt instead of just the instruction text. Now uses the instruction string directly, matching the pattern in generate_tags.rb. Co-Authored-By: Claude Opus 4.5 --- lib/ruby_llm/text/compare.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/text/compare.rb b/lib/ruby_llm/text/compare.rb index f44108f..8ba7d25 100644 --- a/lib/ruby_llm/text/compare.rb +++ b/lib/ruby_llm/text/compare.rb @@ -63,7 +63,14 @@ def self.build_prompt(text1, text2, comparison_type:) Focus on tracking edits and revisions between the versions. INSTRUCTION else - comparison_instruction = build_prompt(text1, text2, comparison_type: :similarity) + comparison_instruction = <<~INSTRUCTION + Compare the two texts and provide: + - A similarity score from 0 to 1 (where 1 is identical and 0 is completely different) + - The type of similarity detected (semantic, structural, topical, etc.) + - A brief summary of what makes them similar or different + + Focus on semantic similarity - texts with the same meaning should score high even if worded differently. + INSTRUCTION end <<~PROMPT From 234a7cb2203d7f4623934f88cbce5adb4f55f693 Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Thu, 19 Feb 2026 15:16:57 +0700 Subject: [PATCH 9/9] Fix anonymize PII handling for all styles and :all shortcut - Add ALL_PII_TYPES constant with all 6 PII types - Make :all shortcut expand to ALL_PII_TYPES (not just defaults) - Add SSN and credit_cards handling to :numbered style - Add SSN and credit_cards handling to :descriptive style Co-Authored-By: Claude Opus 4.5 --- lib/ruby_llm/text/anonymize.rb | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/lib/ruby_llm/text/anonymize.rb b/lib/ruby_llm/text/anonymize.rb index d393410..d932328 100644 --- a/lib/ruby_llm/text/anonymize.rb +++ b/lib/ruby_llm/text/anonymize.rb @@ -1,7 +1,9 @@ module RubyLLM module Text module Anonymize - # Default PII types to detect and anonymize + # All available PII types + ALL_PII_TYPES = [ :names, :emails, :phones, :addresses, :ssn, :credit_cards ].freeze + # Default PII types to detect and anonymize (conservative set) DEFAULT_PII_TYPES = [ :names, :emails, :phones, :addresses ].freeze def self.call(text, pii_types: DEFAULT_PII_TYPES, replacement_style: :generic, include_mapping: false, model: nil, **options) @@ -9,7 +11,7 @@ def self.call(text, pii_types: DEFAULT_PII_TYPES, replacement_style: :generic, i model ||= RubyLLM::Text.config.model_for(:anonymize) # Handle :all shortcut for all PII types - pii_types = DEFAULT_PII_TYPES if pii_types == [ :all ] + pii_types = ALL_PII_TYPES if pii_types == [ :all ] prompt = build_prompt(text, pii_types: pii_types, replacement_style: replacement_style, include_mapping: include_mapping) @@ -79,7 +81,7 @@ def self.build_prompt(text, pii_types:, replacement_style:, include_mapping:) def self.build_pii_instructions(pii_types) # Handle :all shortcut - pii_types = DEFAULT_PII_TYPES if pii_types == [ :all ] + pii_types = ALL_PII_TYPES if pii_types == [ :all ] instructions = [ "Identify and replace the following types of PII:" ] @@ -112,7 +114,7 @@ def self.build_pii_instructions(pii_types) def self.build_replacement_instructions(replacement_style, pii_types) # Handle :all shortcut - pii_types = DEFAULT_PII_TYPES if pii_types == [ :all ] + pii_types = ALL_PII_TYPES if pii_types == [ :all ] instructions = [ "Use #{replacement_style} replacement tokens:" ] @@ -149,6 +151,12 @@ def self.build_replacement_instructions(replacement_style, pii_types) if pii_types.include?(:addresses) instructions << "- Addresses: [ADDRESS_1], [ADDRESS_2], etc." end + if pii_types.include?(:ssn) + instructions << "- SSN: [SSN_1], [SSN_2], etc." + end + if pii_types.include?(:credit_cards) + instructions << "- Credit Cards: [CREDIT_CARD_1], [CREDIT_CARD_2], etc." + end when :descriptive if pii_types.include?(:names) instructions << "- Names: [FIRST_NAME], [LAST_NAME], [FULL_NAME]" @@ -162,6 +170,12 @@ def self.build_replacement_instructions(replacement_style, pii_types) if pii_types.include?(:addresses) instructions << "- Addresses: [STREET_ADDRESS], [CITY], [POSTAL_CODE]" end + if pii_types.include?(:ssn) + instructions << "- SSN: [SOCIAL_SECURITY_NUMBER]" + end + if pii_types.include?(:credit_cards) + instructions << "- Credit Cards: [CREDIT_CARD_NUMBER]" + end else return build_replacement_instructions(:generic, pii_types) end