From 55ff93589f7737ec97017f69d5fe0c2bc74d6603 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Tue, 12 May 2026 15:29:00 +0800 Subject: [PATCH] perf: render escaped byte strings in one pass Motivation: Long JSON strings that contain escape characters used to scan the UTF-8 byte array twice in ByteRenderer: once to find escapes and once to pre-compute the exact escaped length. The large_string_template benchmark spends visible time in this path. Modification: Render escaped long strings with one copy/escape pass, growing ByteBuilder incrementally and refreshing its backing array after capacity checks. Add a regression test that compares ByteRenderer output with the char Renderer for long escaped strings including two-byte escapes, six-byte control escapes, and a trailing plain tail. Result: The large_string_template JMH target improves from roughly 0.70-0.72 ms/op to roughly 0.58-0.65 ms/op in local runs, while full tests and formatting checks remain green. References: bench/resources/cpp_suite/large_string_template.jsonnet --- sjsonnet/src/sjsonnet/BaseByteRenderer.scala | 39 +++++++++---------- .../test/src/sjsonnet/RendererTests.scala | 8 ++++ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala index ce5f4907..2dbc6428 100644 --- a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala @@ -319,9 +319,8 @@ class BaseByteRenderer[T <: java.io.OutputStream]( arr(pos + 1 + bLen) = '"'.toByte elemBuilder.length = pos + bLen + 2 } else { - val escapedLen = escapedStringLength(bytes, bLen, firstEscape) - elemBuilder.ensureLength(escapedLen) - val arr = elemBuilder.arr + elemBuilder.ensureLength(bLen + 2 + (bLen >>> 5)) + var arr = elemBuilder.arr var outPos = elemBuilder.length arr(outPos) = '"'.toByte outPos += 1 @@ -330,41 +329,39 @@ class BaseByteRenderer[T <: java.io.OutputStream]( while (escPos >= 0) { if (escPos > from) { val chunkLen = escPos - from + elemBuilder.length = outPos + elemBuilder.ensureLength(chunkLen + 6) + arr = elemBuilder.arr + outPos = elemBuilder.length System.arraycopy(bytes, from, arr, outPos, chunkLen) outPos += chunkLen } + elemBuilder.length = outPos + elemBuilder.ensureLength(6) + arr = elemBuilder.arr + outPos = elemBuilder.length outPos = escapeByteInline(bytes(escPos) & 0xff, arr, outPos) from = escPos + 1 escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1 } if (from < bLen) { val tailLen = bLen - from + elemBuilder.length = outPos + elemBuilder.ensureLength(tailLen + 1) + arr = elemBuilder.arr + outPos = elemBuilder.length System.arraycopy(bytes, from, arr, outPos, tailLen) outPos += tailLen } + elemBuilder.length = outPos + elemBuilder.ensureLength(1) + arr = elemBuilder.arr + outPos = elemBuilder.length arr(outPos) = '"'.toByte elemBuilder.length = outPos + 1 } } - private def escapedStringLength(bytes: Array[Byte], bLen: Int, firstEscape: Int): Int = { - var len = bLen + 2 - var from = firstEscape - var escPos = firstEscape - while (escPos >= 0) { - len += escapeExtraLength(bytes(escPos) & 0xff) - from = escPos + 1 - escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1 - } - len - } - - @inline private def escapeExtraLength(b: Int): Int = - (b: @scala.annotation.switch) match { - case '"' | '\\' | '\b' | '\f' | '\n' | '\r' | '\t' => 1 - case _ => 5 - } - /** Inline JSON escape for one byte that is known to require escaping. */ @inline private def escapeByteInline(b: Int, arr: Array[Byte], outPos0: Int): Int = { val outPos = outPos0 diff --git a/sjsonnet/test/src/sjsonnet/RendererTests.scala b/sjsonnet/test/src/sjsonnet/RendererTests.scala index 65577f25..f21372b7 100644 --- a/sjsonnet/test/src/sjsonnet/RendererTests.scala +++ b/sjsonnet/test/src/sjsonnet/RendererTests.scala @@ -1,5 +1,6 @@ package sjsonnet +import java.io.ByteArrayOutputStream import utest._ object RendererTests extends TestSuite { @@ -65,6 +66,13 @@ object RendererTests extends TestSuite { ujson.transform(ujson.Num(1e15), new Renderer()).toString ==> "1000000000000000" } + test("byteRendererLongEscapedString") { + val s = ("abc\n\"\\\t\u0001" * 40) + "tail" + val out = new ByteArrayOutputStream + ujson.transform(ujson.Str(s), new ByteRenderer(out)) + out.toString("UTF-8") ==> ujson.transform(ujson.Str(s), new Renderer()).toString + } + test("indentZero") { // indent=0 should produce newlines but no spaces ujson.transform(ujson.Arr(1, 2), new Renderer(indent = 0)).toString ==>