From 6f947a4b5d7c52aa4e56e130662a5cbfa4622f4f Mon Sep 17 00:00:00 2001 From: Takumi Shotoku Date: Sun, 1 Feb 2026 16:34:05 +0900 Subject: [PATCH 1/4] Prevent `Hash#[]=` on local variables from polluting parameter types When `Hash#[]=` was called on a local variable inside a method, the assigned key/value types flowed back into the method's parameter type. This caused type explosion in real-world code like Action Pack's `url_for`, where the parameter type grew into a deeply nested recursive Hash type. Introduce HashAsetBox to handle `Hash#[]=` on local variables with flow-sensitive tracking. Instead of modifying the original hash type via backflow, it creates a new variable version with the updated type, so the parameter type only reflects types from call sites. --- lib/typeprof/core/ast/call.rb | 13 +++++ lib/typeprof/core/builtin.rb | 9 +++- lib/typeprof/core/graph/box.rb | 70 +++++++++++++++++++++++++++ lib/typeprof/core/graph/change_set.rb | 5 ++ scenario/hash/hash_aset.rb | 22 +++++++++ 5 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 scenario/hash/hash_aset.rb diff --git a/lib/typeprof/core/ast/call.rb b/lib/typeprof/core/ast/call.rb index a2e97728c..191532895 100644 --- a/lib/typeprof/core/ast/call.rb +++ b/lib/typeprof/core/ast/call.rb @@ -164,6 +164,15 @@ def install0(genv) @changes.add_edge(genv, allow_nil, ret) end + if @mid == :[]= && @recv.is_a?(LocalVariableReadNode) + key_node = @positional_args[0] + if key_node.is_a?(SymbolNode) + recv_vtx = @lenv.get_var(@recv.var) + nvtx = @lenv.new_var(@recv.var, self) + @changes.add_hash_aset_box(genv, recv_vtx, key_node.lit, ret, nvtx) + end + end + ret end @@ -188,6 +197,10 @@ def retrieve_at(pos, &blk) end def modified_vars(tbl, vars) + if @mid == :[]= && @recv.is_a?(LocalVariableReadNode) && tbl.include?(@recv.var) + key_node = @positional_args[0] + vars << @recv.var if key_node.is_a?(SymbolNode) + end subnodes.each do |key, subnode| next unless subnode if subnode.is_a?(AST::Node) diff --git a/lib/typeprof/core/builtin.rb b/lib/typeprof/core/builtin.rb index cf6e270e4..8a3f45f62 100644 --- a/lib/typeprof/core/builtin.rb +++ b/lib/typeprof/core/builtin.rb @@ -111,9 +111,16 @@ def hash_aref(changes, node, ty, a_args, ret) def hash_aset(changes, node, ty, a_args, ret) if a_args.positionals.size == 2 + val = a_args.positionals[1] + + # Skip backflow for local variable receivers (handled by HashAsetBox) + if node.recv.is_a?(AST::LocalVariableReadNode) + changes.add_edge(@genv, val, ret) + return true + end + case ty when Type::Hash - val = a_args.positionals[1] idx = node.positional_args[0] if idx.is_a?(AST::SymbolNode) && ty.get_value(idx.lit) # TODO: how to handle new key? diff --git a/lib/typeprof/core/graph/box.rb b/lib/typeprof/core/graph/box.rb index 9c62aeb6a..891508906 100644 --- a/lib/typeprof/core/graph/box.rb +++ b/lib/typeprof/core/graph/box.rb @@ -1105,4 +1105,74 @@ def run0(genv, changes) changes.add_edge(genv, source_vtx, @ret) end end + + class HashAsetBox < Box + def initialize(node, genv, recv, key_sym, val_vtx, out_vtx) + super(node) + @recv = recv + @key_sym = key_sym + @val_vtx = val_vtx + @out_vtx = out_vtx + @recv.add_edge(genv, self) + @val_vtx.add_edge(genv, self) + end + + attr_reader :recv, :key_sym, :val_vtx, :out_vtx + + def ret = @out_vtx + + def destroy(genv) + @recv.remove_edge(genv, self) + @val_vtx.remove_edge(genv, self) + super(genv) + end + + def run0(genv, changes) + @recv.each_type do |ty| + case ty + when Type::Record + new_fields = {} + ty.fields.each do |key, field_vtx| + new_vtx = Vertex.new(@node) + changes.add_edge(genv, field_vtx, new_vtx) + new_fields[key] = new_vtx + end + new_fields[@key_sym] ||= Vertex.new(@node) + changes.add_edge(genv, @val_vtx, new_fields[@key_sym]) + unified_key = Vertex.new(@node) + unified_val = Vertex.new(@node) + new_fields.each do |key, vtx| + changes.add_edge(genv, Source.new(Type::Symbol.new(genv, key)), unified_key) + changes.add_edge(genv, vtx, unified_val) + end + base_type = genv.gen_hash_type(unified_key, unified_val) + new_record = Type::Record.new(genv, new_fields, base_type) + changes.add_edge(genv, Source.new(new_record), @out_vtx) + when Type::Hash + build_merged_hash_type(genv, changes, ty.get_key, ty.get_value) + when Type::Instance + if ty.mod == genv.mod_hash + build_merged_hash_type(genv, changes, ty.args[0], ty.args[1]) + else + changes.add_edge(genv, Source.new(ty), @out_vtx) + end + else + changes.add_edge(genv, Source.new(ty), @out_vtx) + end + end + end + + private + + def build_merged_hash_type(genv, changes, old_key_vtx, old_val_vtx) + new_key = Vertex.new(@node) + new_val = Vertex.new(@node) + changes.add_edge(genv, old_key_vtx, new_key) + changes.add_edge(genv, Source.new(Type::Symbol.new(genv, @key_sym)), new_key) + changes.add_edge(genv, old_val_vtx, new_val) + changes.add_edge(genv, @val_vtx, new_val) + new_hash_type = genv.gen_hash_type(new_key, new_val) + changes.add_edge(genv, Source.new(new_hash_type), @out_vtx) + end + end end diff --git a/lib/typeprof/core/graph/change_set.rb b/lib/typeprof/core/graph/change_set.rb index 8e7d60089..08a2415e6 100644 --- a/lib/typeprof/core/graph/change_set.rb +++ b/lib/typeprof/core/graph/change_set.rb @@ -137,6 +137,11 @@ def add_instance_type_box(genv, singleton_ty_vtx) @new_boxes[key] ||= InstanceTypeBox.new(@node, genv, singleton_ty_vtx) end + def add_hash_aset_box(genv, recv, key_sym, val_vtx, out_vtx) + key = [:hash_aset, recv, key_sym, val_vtx, out_vtx] + @new_boxes[key] ||= HashAsetBox.new(@node, genv, recv, key_sym, val_vtx, out_vtx) + end + def add_diagnostic(meth, msg, node = @node) @new_diagnostics << TypeProf::Diagnostic.new(node, meth, msg) end diff --git a/scenario/hash/hash_aset.rb b/scenario/hash/hash_aset.rb new file mode 100644 index 000000000..cf3974137 --- /dev/null +++ b/scenario/hash/hash_aset.rb @@ -0,0 +1,22 @@ +## update +def foo(options) + return if options[:skip] + + options[:name] = "str" + bar(options) + nil +end + +def bar(options) + options[:age] = 10 + nil +end + +args = Hash.new +foo(args) + +## assert +class Object + def foo: (Hash[:skip, untyped]) -> nil + def bar: (Hash[:name | :skip, String]) -> nil +end From e508a532eb4551ef57aab67caa218a15a02a84bb Mon Sep 17 00:00:00 2001 From: Takumi Shotoku Date: Sun, 8 Feb 2026 15:32:03 +0900 Subject: [PATCH 2/4] Fix infinite loop in HashAsetBox when Hash#[]= is used inside loops HashAsetBox.run0 created new Vertex objects on every invocation. Since Type memoization relies on object identity, each run produced "new" types that fed back through loop back-edges, preventing the graph from reaching a fixed point. Cache Vertex objects as instance variables so they are reused across runs, allowing Type memoization to recognize identical structures and the analysis to converge. --- lib/typeprof/core/graph/box.rb | 39 +++++++++++++++++++-------------- scenario/hash/hash_aset_loop.rb | 14 ++++++++++++ 2 files changed, 36 insertions(+), 17 deletions(-) create mode 100644 scenario/hash/hash_aset_loop.rb diff --git a/lib/typeprof/core/graph/box.rb b/lib/typeprof/core/graph/box.rb index 891508906..03c9bf6e5 100644 --- a/lib/typeprof/core/graph/box.rb +++ b/lib/typeprof/core/graph/box.rb @@ -1115,6 +1115,14 @@ def initialize(node, genv, recv, key_sym, val_vtx, out_vtx) @out_vtx = out_vtx @recv.add_edge(genv, self) @val_vtx.add_edge(genv, self) + # Cache vertices to ensure convergence in loops. + # Without caching, run0 creates new Vertex objects each time, + # producing new Type objects that prevent the fixed-point from being reached. + @field_cache = {} + @unified_key = Vertex.new(node) + @unified_val = Vertex.new(node) + @merged_key = Vertex.new(node) + @merged_val = Vertex.new(node) end attr_reader :recv, :key_sym, :val_vtx, :out_vtx @@ -1133,19 +1141,18 @@ def run0(genv, changes) when Type::Record new_fields = {} ty.fields.each do |key, field_vtx| - new_vtx = Vertex.new(@node) - changes.add_edge(genv, field_vtx, new_vtx) - new_fields[key] = new_vtx + @field_cache[key] ||= Vertex.new(@node) + changes.add_edge(genv, field_vtx, @field_cache[key]) unless field_vtx.equal?(@field_cache[key]) + new_fields[key] = @field_cache[key] end - new_fields[@key_sym] ||= Vertex.new(@node) - changes.add_edge(genv, @val_vtx, new_fields[@key_sym]) - unified_key = Vertex.new(@node) - unified_val = Vertex.new(@node) + @field_cache[@key_sym] ||= Vertex.new(@node) + new_fields[@key_sym] = @field_cache[@key_sym] + changes.add_edge(genv, @val_vtx, @field_cache[@key_sym]) new_fields.each do |key, vtx| - changes.add_edge(genv, Source.new(Type::Symbol.new(genv, key)), unified_key) - changes.add_edge(genv, vtx, unified_val) + changes.add_edge(genv, Source.new(Type::Symbol.new(genv, key)), @unified_key) + changes.add_edge(genv, vtx, @unified_val) end - base_type = genv.gen_hash_type(unified_key, unified_val) + base_type = genv.gen_hash_type(@unified_key, @unified_val) new_record = Type::Record.new(genv, new_fields, base_type) changes.add_edge(genv, Source.new(new_record), @out_vtx) when Type::Hash @@ -1165,13 +1172,11 @@ def run0(genv, changes) private def build_merged_hash_type(genv, changes, old_key_vtx, old_val_vtx) - new_key = Vertex.new(@node) - new_val = Vertex.new(@node) - changes.add_edge(genv, old_key_vtx, new_key) - changes.add_edge(genv, Source.new(Type::Symbol.new(genv, @key_sym)), new_key) - changes.add_edge(genv, old_val_vtx, new_val) - changes.add_edge(genv, @val_vtx, new_val) - new_hash_type = genv.gen_hash_type(new_key, new_val) + changes.add_edge(genv, old_key_vtx, @merged_key) unless old_key_vtx.equal?(@merged_key) + changes.add_edge(genv, Source.new(Type::Symbol.new(genv, @key_sym)), @merged_key) + changes.add_edge(genv, old_val_vtx, @merged_val) unless old_val_vtx.equal?(@merged_val) + changes.add_edge(genv, @val_vtx, @merged_val) + new_hash_type = genv.gen_hash_type(@merged_key, @merged_val) changes.add_edge(genv, Source.new(new_hash_type), @out_vtx) end end diff --git a/scenario/hash/hash_aset_loop.rb b/scenario/hash/hash_aset_loop.rb new file mode 100644 index 000000000..6c3f28af2 --- /dev/null +++ b/scenario/hash/hash_aset_loop.rb @@ -0,0 +1,14 @@ +## update +def foo(options) + while options[:flag] + options[:name] = "str" + end + nil +end + +foo(Hash.new) + +## assert +class Object + def foo: (Hash[:flag, untyped]) -> nil +end From 7520122e681569ce3e6794718af07d5e6f53d05d Mon Sep 17 00:00:00 2001 From: Takumi Shotoku Date: Sun, 15 Feb 2026 17:41:25 +0900 Subject: [PATCH 3/4] Fix a failing test --- scenario/block/block_to_hash_with_kwargs.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scenario/block/block_to_hash_with_kwargs.rb b/scenario/block/block_to_hash_with_kwargs.rb index d16a985d9..f4b3346c8 100644 --- a/scenario/block/block_to_hash_with_kwargs.rb +++ b/scenario/block/block_to_hash_with_kwargs.rb @@ -8,5 +8,5 @@ def foo(**opts, &block) ## assert class Object - def foo: (**Integer) -> { key: Integer } + def foo: (**Integer) -> { key: Integer, callback: Proc } end From 3c52a9b65cd1ccc4e56fa99ce37724ead5a1f31a Mon Sep 17 00:00:00 2001 From: Takumi Shotoku Date: Sun, 15 Feb 2026 21:47:48 +0900 Subject: [PATCH 4/4] Fix exponential blowup in `HashAsetBox` for conditional hash assignments Merge all Record type variants into a single output in `HashAsetBox#run0` instead of creating one output per input Record. This prevents 2^N type explosion when a hash has many conditional assignments (e.g. `h[:k] = v if cond`). --- lib/typeprof/core/graph/box.rb | 31 ++++++++++++++++---------- scenario/hash/hash_aset_conditional.rb | 25 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 scenario/hash/hash_aset_conditional.rb diff --git a/lib/typeprof/core/graph/box.rb b/lib/typeprof/core/graph/box.rb index 05e1112be..8a3d9aed7 100644 --- a/lib/typeprof/core/graph/box.rb +++ b/lib/typeprof/core/graph/box.rb @@ -1136,25 +1136,16 @@ def destroy(genv) end def run0(genv, changes) + has_record = false + @recv.each_type do |ty| case ty when Type::Record - new_fields = {} + has_record = true ty.fields.each do |key, field_vtx| @field_cache[key] ||= Vertex.new(@node) changes.add_edge(genv, field_vtx, @field_cache[key]) unless field_vtx.equal?(@field_cache[key]) - new_fields[key] = @field_cache[key] - end - @field_cache[@key_sym] ||= Vertex.new(@node) - new_fields[@key_sym] = @field_cache[@key_sym] - changes.add_edge(genv, @val_vtx, @field_cache[@key_sym]) - new_fields.each do |key, vtx| - changes.add_edge(genv, Source.new(Type::Symbol.new(genv, key)), @unified_key) - changes.add_edge(genv, vtx, @unified_val) end - base_type = genv.gen_hash_type(@unified_key, @unified_val) - new_record = Type::Record.new(genv, new_fields, base_type) - changes.add_edge(genv, Source.new(new_record), @out_vtx) when Type::Hash build_merged_hash_type(genv, changes, ty.get_key, ty.get_value) when Type::Instance @@ -1167,6 +1158,22 @@ def run0(genv, changes) changes.add_edge(genv, Source.new(ty), @out_vtx) end end + + if has_record + @field_cache[@key_sym] ||= Vertex.new(@node) + changes.add_edge(genv, @val_vtx, @field_cache[@key_sym]) + + new_fields = {} + @field_cache.each do |key, vtx| + changes.add_edge(genv, Source.new(Type::Symbol.new(genv, key)), @unified_key) + changes.add_edge(genv, vtx, @unified_val) + new_fields[key] = vtx + end + + base_type = genv.gen_hash_type(@unified_key, @unified_val) + new_record = Type::Record.new(genv, new_fields, base_type) + changes.add_edge(genv, Source.new(new_record), @out_vtx) + end end private diff --git a/scenario/hash/hash_aset_conditional.rb b/scenario/hash/hash_aset_conditional.rb new file mode 100644 index 000000000..15d56b5a6 --- /dev/null +++ b/scenario/hash/hash_aset_conditional.rb @@ -0,0 +1,25 @@ +## update +def foo(flag) + h = {} + h[:a] = 1 if flag + h[:b] = 2 if flag + h[:c] = 3 if flag + h[:d] = 4 if flag + h[:e] = 5 if flag + h[:f] = 6 if flag + h[:g] = 7 if flag + h[:h] = 8 if flag + h[:i] = 9 if flag + h[:j] = 10 if flag + h[:k] = 11 if flag + h[:l] = 12 if flag + h[:m] = 13 if flag + h[:n] = 14 if flag + h[:o] = 15 if flag + h +end + +## assert +class Object + def foo: (untyped) -> ({ } | { a: Integer } | { a: Integer, b: Integer } | { a: Integer, b: Integer, c: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer, i: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer, i: Integer, j: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer, i: Integer, j: Integer, k: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer, i: Integer, j: Integer, k: Integer, l: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer, i: Integer, j: Integer, k: Integer, l: Integer, m: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer, i: Integer, j: Integer, k: Integer, l: Integer, m: Integer, n: Integer } | { a: Integer, b: Integer, c: Integer, d: Integer, e: Integer, f: Integer, g: Integer, h: Integer, i: Integer, j: Integer, k: Integer, l: Integer, m: Integer, n: Integer, o: Integer }) +end