From 31be379f98ef8c0c7aace36d65af7efe16062b08 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:28:31 +0100 Subject: [PATCH] Remove `Prism.lex_ripper` Since `on_sp` is emitted, it doesn't do a whole lot anymore. This leaves one incompatibility for code like `"x#$%"` Ripper confuses this for bare interpolation with a global, but `$%` is not a valid global name. Still, it emits two string tokens in such a case. It doesn't make sense for prism to work around this bug, so the affected files are added as excludes. Since the only usage of this method makes sense for testing in prism itself, the method is removed instead of deprecated. --- Steepfile | 1 - bin/prism | 12 ++------ lib/prism.rb | 11 -------- lib/prism/lex_ripper.rb | 55 ------------------------------------- prism.gemspec | 1 - rakelib/lex.rake | 27 +++++++++++++++--- rakelib/typecheck.rake | 1 - rbi/prism.rbi | 3 -- templates/sig/prism.rbs.erb | 4 --- test/prism/bom_test.rb | 3 +- test/prism/lex_test.rb | 3 +- 11 files changed, 29 insertions(+), 92 deletions(-) delete mode 100644 lib/prism/lex_ripper.rb diff --git a/Steepfile b/Steepfile index 1aafafc523..433e53cd29 100644 --- a/Steepfile +++ b/Steepfile @@ -10,7 +10,6 @@ target :lib do # TODO: Type-checking these files is still WIP ignore "lib/prism/desugar_compiler.rb" ignore "lib/prism/lex_compat.rb" - ignore "lib/prism/lex_ripper.rb" ignore "lib/prism/serialize.rb" ignore "lib/prism/ffi.rb" ignore "lib/prism/translation" diff --git a/bin/prism b/bin/prism index d91d9990bf..392fb1cb7f 100755 --- a/bin/prism +++ b/bin/prism @@ -210,18 +210,10 @@ module Prism # bin/prism lex_compat [source] def lex_compat(argv) + require "ripper" source, filepath = read_source(argv) - ripper_value = - begin - Prism.lex_ripper(source) - rescue ArgumentError, SyntaxError - # If Ripper raises a syntax error, we want to continue as if it didn't - # return any tokens at all. prism won't raise a syntax error, so it's - # nicer to still be able to see the tokens that prism generated. - [] - end - + ripper_value = Ripper.lex(source) prism_compat = Prism.lex_compat(source, filepath: filepath) prism = Prism.lex(source, filepath: filepath) diff --git a/lib/prism.rb b/lib/prism.rb index dab3420377..781bd4bb01 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -20,7 +20,6 @@ module Prism autoload :DSL, "prism/dsl" autoload :InspectVisitor, "prism/inspect_visitor" autoload :LexCompat, "prism/lex_compat" - autoload :LexRipper, "prism/lex_ripper" autoload :MutationCompiler, "prism/mutation_compiler" autoload :Pack, "prism/pack" autoload :Pattern, "prism/pattern" @@ -35,7 +34,6 @@ module Prism # private here. private_constant :LexCompat - private_constant :LexRipper # Raised when requested to parse as the currently running Ruby version but Prism has no support for it. class CurrentVersionError < ArgumentError @@ -68,15 +66,6 @@ def self.lex_compat(source, **options) LexCompat.new(source, **options).result # steep:ignore end - # :call-seq: - # Prism::lex_ripper(source) -> Array - # - # This wraps the result of Ripper.lex. It produces almost exactly the - # same tokens. Raises SyntaxError if the syntax in source is invalid. - def self.lex_ripper(source) - LexRipper.new(source).result # steep:ignore - end - # :call-seq: # Prism::load(source, serialized, freeze) -> ParseResult # diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb deleted file mode 100644 index f069e50ba9..0000000000 --- a/lib/prism/lex_ripper.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true -# :markup: markdown - -require "ripper" - -module Prism - # This is a class that wraps the Ripper lexer to produce almost exactly the - # same tokens. - class LexRipper # :nodoc: - attr_reader :source - - def initialize(source) - @source = source - end - - def result - previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] - results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] - - lex(source).each do |token| - case token[1] - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - else - results << token - previous = token - end - end - - results - end - - private - - if Ripper.method(:lex).parameters.assoc(:keyrest) - def lex(source) - Ripper.lex(source, raise_errors: true) - end - else - def lex(source) - ripper = Ripper::Lexer.new(source) - ripper.lex.tap do |result| - raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? - end - end - end - end - - private_constant :LexRipper -end diff --git a/prism.gemspec b/prism.gemspec index 283c7b04aa..8c9b140f0e 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -77,7 +77,6 @@ Gem::Specification.new do |spec| "lib/prism/ffi.rb", "lib/prism/inspect_visitor.rb", "lib/prism/lex_compat.rb", - "lib/prism/lex_ripper.rb", "lib/prism/mutation_compiler.rb", "lib/prism/node_ext.rb", "lib/prism/node.rb", diff --git a/rakelib/lex.rake b/rakelib/lex.rake index 59f7a52dd4..63fc5e4e25 100644 --- a/rakelib/lex.rake +++ b/rakelib/lex.rake @@ -1,9 +1,11 @@ # frozen_string_literal: true # typed: ignore +require "ripper" + module Prism - # This class is responsible for lexing files with both lex_compat and - # lex_ripper and ensuring they match up. It keeps track of the files which + # This class is responsible for lexing files with both prism and + # ripper and ensuring they match up. It keeps track of the files which # failed to match up, and the files which passed. class LexTask attr_reader :failing_files, :passing_file_count @@ -28,7 +30,7 @@ module Prism end result = Prism.lex_compat(source) - if result.errors.empty? && Prism.lex_ripper(source) == result.value + if result.errors.empty? && Ripper.lex(source) == result.value @passing_file_count += 1 true else @@ -98,6 +100,10 @@ TARGETS = { # Requires an implicit -x, which ripper does not respect "tool/merger.rb", + + # Contains `"x#$%"` which looks like bare interpolation but $% is not a valid global. + # This confuses ripper, it emits two string tokens. https://bugs.ruby-lang.org/issues/21849 + "ext/psych/lib/psych/scalar_scanner.rb", ] }, discourse: { @@ -265,6 +271,16 @@ TOP_100_GEMS_INVALID_SYNTAX_PREFIXES = %w[ top-100-gems/devise-4.9.2/lib/generators/templates/controllers/ top-100-gems/fastlane-2.212.1/fastlane/lib/assets/custom_action_template.rb ] +TOP_100_GEMS_LEX_RIPPER_BUG = [ + # Contains code like `"x#$%"` which looks like bare interpolation but $% is not a valid global. + # This confuses ripper, it emits two string tokens. https://bugs.ruby-lang.org/issues/21849 + "faker-3.1.1/lib/faker/default/internet.rb", + "ruby_parser-3.20.0/test/test_ruby_parser.rb", + "rouge-4.1.0/lib/rouge/lexers/cisco_ios.rb", + "rouge-4.1.0/lib/rouge/lexers/ghc_cmm.rb", + "rouge-4.1.0/lib/rouge/lexers/nasm.rb", + "rouge-4.1.0/lib/rouge/lexers/velocity.rb", +] namespace :download do directory TOP_100_GEMS_DIR @@ -346,7 +362,10 @@ task "lex:topgems": ["download:topgems", :compile] do lex_task.compare(filepath) end - gem_failing_files = lex_task.failing_files.map { |todo| todo.delete_prefix("#{directory}/") } + gem_failing_files = lex_task.failing_files.filter_map do |todo| + next if TOP_100_GEMS_LEX_RIPPER_BUG.any? { |path| todo.end_with?(path) } + todo.delete_prefix("#{directory}/") + end failing_files[gem_name] = gem_failing_files if gem_failing_files.any? end diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake index b11df3a6be..5da23ac34f 100644 --- a/rakelib/typecheck.rake +++ b/rakelib/typecheck.rake @@ -20,7 +20,6 @@ namespace :typecheck do File.write("sorbet/typed_overrides.yml", ERB.new(<<~YAML, trim_mode: "-").result_with_hash(locals)) false: - ./lib/prism/lex_compat.rb - - ./lib/prism/lex_ripper.rb - ./lib/prism/node_ext.rb - ./lib/prism/parse_result.rb - ./lib/prism/visitor.rb diff --git a/rbi/prism.rbi b/rbi/prism.rbi index 8866e7b3f2..b31adc64a1 100644 --- a/rbi/prism.rbi +++ b/rbi/prism.rbi @@ -16,9 +16,6 @@ module Prism sig { params(source: String, options: T::Hash[Symbol, T.untyped]).returns(Prism::LexCompat::Result) } def self.lex_compat(source, **options); end - sig { params(source: String).returns(T::Array[T.untyped]) } - def self.lex_ripper(source); end - sig { params(source: String, serialized: String, freeze: T.nilable(T::Boolean)).returns(Prism::ParseResult) } def self.load(source, serialized, freeze = false); end diff --git a/templates/sig/prism.rbs.erb b/templates/sig/prism.rbs.erb index 5c74cee8f8..e7b317f271 100644 --- a/templates/sig/prism.rbs.erb +++ b/templates/sig/prism.rbs.erb @@ -41,10 +41,6 @@ module Prism ?bool freeze ) -> ParseResult - def self.lex_ripper: ( - String source - ) -> Array[[[Integer, Integer], Symbol, String, untyped]] - # Methods taking a path to a Ruby file: <%- { diff --git a/test/prism/bom_test.rb b/test/prism/bom_test.rb index 890bc4b36c..0fa00ae4e8 100644 --- a/test/prism/bom_test.rb +++ b/test/prism/bom_test.rb @@ -5,6 +5,7 @@ return if RUBY_ENGINE != "ruby" require_relative "test_helper" +require "ripper" module Prism class BOMTest < TestCase @@ -53,7 +54,7 @@ def test_string def assert_bom(source) bommed = "\xEF\xBB\xBF#{source}" - assert_equal Prism.lex_ripper(bommed), Prism.lex_compat(bommed).value + assert_equal Ripper.lex(bommed), Prism.lex_compat(bommed).value end end end diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index ea4606d2fb..9a9f203c28 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -3,6 +3,7 @@ return if !(RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0") require_relative "test_helper" +require "ripper" module Prism class LexTest < TestCase @@ -49,7 +50,7 @@ def test_parse_lex_file if RUBY_VERSION >= "3.3" def test_lex_compare prism = Prism.lex_compat(File.read(__FILE__), version: "current").value - ripper = Prism.lex_ripper(File.read(__FILE__)) + ripper = Ripper.lex(File.read(__FILE__)) assert_equal(ripper, prism) end end