diff --git a/lib/lua/vm/stdlib/string.ex b/lib/lua/vm/stdlib/string.ex index 003cca2..93bb808 100644 --- a/lib/lua/vm/stdlib/string.ex +++ b/lib/lua/vm/stdlib/string.ex @@ -217,12 +217,23 @@ defmodule Lua.VM.Stdlib.String do # oversized request fails with a catchable error instead of trying to # allocate (and OOM the host on) a multi-petabyte binary. Limits.check_string_size!(n * byte_size(str) + (n - 1) * byte_size(sep), state.max_string_bytes) - Enum.map_join(1..n, sep, fn _ -> str end) + build_rep(str, n, sep) end {[result], state} end + # Build the repeated string with allocation proportional to the *result* + # size. The obvious `Enum.map_join(1..n, sep, ...)` materializes an + # n-element list (plus its iolist) as transient garbage — for a large `n` + # that is tens of times the size of the result itself, which spikes the + # process heap and trips `max_heap_size` on sandboxed processes (and bloats + # the reachable garbage observed through `Lua.call_function!/3`, since that + # path returns before the next GC sweeps it). `:binary.copy/2` allocates the + # backing binary exactly once, in a single pass. + defp build_rep(str, n, ""), do: :binary.copy(str, n) + defp build_rep(str, n, sep), do: :binary.copy(str <> sep, n - 1) <> str + # string.reverse(s) - reverses a string byte-by-byte (Lua strings are # byte arrays, not codepoint sequences — so a NUL or non-UTF-8 byte # must come back at the mirror position). diff --git a/test/lua/vm/string_test.exs b/test/lua/vm/string_test.exs index 04ac763..d2581bf 100644 --- a/test/lua/vm/string_test.exs +++ b/test/lua/vm/string_test.exs @@ -141,6 +141,47 @@ defmodule Lua.VM.StringTest do state = Stdlib.install(State.new()) assert {:ok, [""], _state} = VM.execute(proto, state) end + + # Building the repeated string must allocate proportionally to the result, + # not to the repeat count. The previous `Enum.map_join(1..n, ...)` + # implementation materialized an n-element list (tens of times the result + # size) as transient garbage, spiking the heap and tripping `max_heap_size` + # on sandboxed processes for an otherwise modest output. A 16 MB result + # comfortably fits a 128 MB cap; the old code blew past 1 GB. + test "string.rep allocates proportionally to the result, not the count" do + size = 16 * 1024 * 1024 + + assert {:ok, result} = + heap_capped(128 * 1024 * 1024, fn -> + {[string], _lua} = Lua.eval!(Lua.new(), ~s[return string.rep("x", #{size})]) + byte_size(string) + end) + + assert result == size + end + end + + # Runs `fun` in a process whose heap is capped at `byte_limit` (killed if it + # exceeds), returning `{:ok, result}` or `:killed`. Lets a test assert that + # work stays within a heap budget rather than sampling noisy global memory. + defp heap_capped(byte_limit, fun) do + parent = self() + word_limit = div(byte_limit, :erlang.system_info(:wordsize)) + cap = %{size: word_limit, kill: true, error_logger: false} + + {pid, ref} = + :erlang.spawn_opt( + fn -> send(parent, {:result, fun.()}) end, + [:monitor, max_heap_size: cap] + ) + + receive do + {:result, result} -> {:ok, result} + {:DOWN, ^ref, :process, ^pid, :killed} -> :killed + {:DOWN, ^ref, :process, ^pid, reason} -> {:error, reason} + after + 30_000 -> :timeout + end end describe "string.reverse" do