Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion lib/lua/vm/stdlib/string.ex
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,23 @@ defmodule Lua.VM.Stdlib.String do
# oversized request fails with a catchable error instead of trying to
# allocate (and OOM the host on) a multi-petabyte binary.
Limits.check_string_size!(n * byte_size(str) + (n - 1) * byte_size(sep), state.max_string_bytes)
Enum.map_join(1..n, sep, fn _ -> str end)
build_rep(str, n, sep)
end

{[result], state}
end

# Build the repeated string with allocation proportional to the *result*
# size. The obvious `Enum.map_join(1..n, sep, ...)` materializes an
# n-element list (plus its iolist) as transient garbage — for a large `n`
# that is tens of times the size of the result itself, which spikes the
# process heap and trips `max_heap_size` on sandboxed processes (and bloats
# the reachable garbage observed through `Lua.call_function!/3`, since that
# path returns before the next GC sweeps it). `:binary.copy/2` allocates the
# backing binary exactly once, in a single pass.
defp build_rep(str, n, ""), do: :binary.copy(str, n)
defp build_rep(str, n, sep), do: :binary.copy(str <> sep, n - 1) <> str

# string.reverse(s) - reverses a string byte-by-byte (Lua strings are
# byte arrays, not codepoint sequences — so a NUL or non-UTF-8 byte
# must come back at the mirror position).
Expand Down
41 changes: 41 additions & 0 deletions test/lua/vm/string_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,47 @@ defmodule Lua.VM.StringTest do
state = Stdlib.install(State.new())
assert {:ok, [""], _state} = VM.execute(proto, state)
end

# Building the repeated string must allocate proportionally to the result,
# not to the repeat count. The previous `Enum.map_join(1..n, ...)`
# implementation materialized an n-element list (tens of times the result
# size) as transient garbage, spiking the heap and tripping `max_heap_size`
# on sandboxed processes for an otherwise modest output. A 16 MB result
# comfortably fits a 128 MB cap; the old code blew past 1 GB.
test "string.rep allocates proportionally to the result, not the count" do
size = 16 * 1024 * 1024

assert {:ok, result} =
heap_capped(128 * 1024 * 1024, fn ->
{[string], _lua} = Lua.eval!(Lua.new(), ~s[return string.rep("x", #{size})])
byte_size(string)
end)

assert result == size
end
end

# Runs `fun` in a process whose heap is capped at `byte_limit` (killed if it
# exceeds), returning `{:ok, result}` or `:killed`. Lets a test assert that
# work stays within a heap budget rather than sampling noisy global memory.
defp heap_capped(byte_limit, fun) do
parent = self()
word_limit = div(byte_limit, :erlang.system_info(:wordsize))
cap = %{size: word_limit, kill: true, error_logger: false}

{pid, ref} =
:erlang.spawn_opt(
fn -> send(parent, {:result, fun.()}) end,
[:monitor, max_heap_size: cap]
)

receive do
{:result, result} -> {:ok, result}
{:DOWN, ^ref, :process, ^pid, :killed} -> :killed
{:DOWN, ^ref, :process, ^pid, reason} -> {:error, reason}
after
30_000 -> :timeout
end
end

describe "string.reverse" do
Expand Down