Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions sjsonnet/src-js/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,14 @@ object CharSWAR {
}
false
}

def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
i += 1
}
-1
}
}
36 changes: 33 additions & 3 deletions sjsonnet/src-jvm/sjsonnet/CharSWAR.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ private CharSWAR() {}
// MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder)
private static final VarHandle LONG_VIEW =
MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder());
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;

// --- 8-bit SWAR constants (Netty/Pekko pattern) ---
//
Expand Down Expand Up @@ -90,12 +91,35 @@ static boolean hasEscapeChar(char[] arr, int from, int to) {
return false;
}

/**
* Find the first byte in {@code arr[from..to)} that needs JSON string escaping, or {@code -1}
* when the range is clean.
*/
static int findFirstEscapeChar(byte[] arr, int from, int to) {
int i = from;
int limit = to - 7;
while (i < limit) {
long word = (long) LONG_VIEW.get(arr, i);
long mask = swarMatchMask(word);
if (mask != 0L) {
return i + firstMatchedByte(mask);
}
i += 8;
}
while (i < to) {
int b = arr[i] & 0xFF;
if (b < 32 || b == '"' || b == '\\') return i;
i++;
}
return -1;
}

private static boolean hasEscapeCharSWAR(byte[] arr, int from, int to) {
int i = from;
int limit = to - 7; // 8 bytes per VarHandle.get
while (i < limit) {
long word = (long) LONG_VIEW.get(arr, i);
if (swarHasMatch(word)) return true;
if (swarMatchMask(word) != 0L) return true;
i += 8;
}
// Tail: remaining 0-7 bytes
Expand All @@ -114,7 +138,7 @@ private static boolean hasEscapeCharSWAR(byte[] arr, int from, int to) {
* <p>Uses Netty/Pekko pattern: XOR to produce zero lanes, then
* Hacker's Delight formula to detect zero bytes.
*/
private static boolean swarHasMatch(long word) {
private static long swarMatchMask(long word) {
// 1. Detect '"' via XOR + zero-detection (Netty SWARUtil.applyPattern)
long q = word ^ QUOTE;
long qz = ~((q & HOLE) + HOLE | q | HOLE);
Expand All @@ -127,7 +151,13 @@ private static boolean swarHasMatch(long word) {
long c = word & CTRL;
long cz = ~((c & HOLE) + HOLE | c | HOLE);

return (qz | bz | cz) != 0L;
return qz | bz | cz;
}

private static int firstMatchedByte(long mask) {
return (LITTLE_ENDIAN
? Long.numberOfTrailingZeros(mask)
: Long.numberOfLeadingZeros(mask)) >>> 3;
}

/** Scalar scan for String (used for short strings). */
Expand Down
46 changes: 42 additions & 4 deletions sjsonnet/src-native/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ object CharSWAR {
private final val QUOTE = 0x2222222222222222L
private final val BSLAS = 0x5c5c5c5c5c5c5c5cL
private final val CTRL = 0xe0e0e0e0e0e0e0e0L
private final val LITTLE_ENDIAN =
java.nio.ByteOrder.nativeOrder() == java.nio.ByteOrder.LITTLE_ENDIAN

/**
* SWAR: returns true if any byte lane in `word` contains '"' (0x22), '\\' (0x5C), or a control
* SWAR: returns a mask for byte lanes in `word` containing '"' (0x22), '\\' (0x5C), or a control
* char (< 0x20).
*/
@inline private def swarHasMatch(word: Long): Boolean = {
@inline private def swarMatchMask(word: Long): Long = {
// 1. Detect '"' via XOR + zero-detection
val q = word ^ QUOTE
val qz = ~((q & HOLE) + HOLE | q | HOLE)
Expand All @@ -39,9 +41,13 @@ object CharSWAR {
val c = word & CTRL
val cz = ~((c & HOLE) + HOLE | c | HOLE)

(qz | bz | cz) != 0L
qz | bz | cz
}

@inline private def firstMatchedByte(mask: Long): Int =
(if (LITTLE_ENDIAN) java.lang.Long.numberOfTrailingZeros(mask)
else java.lang.Long.numberOfLeadingZeros(mask)) >>> 3

def hasEscapeChar(s: String): Boolean = {
val len = s.length
if (len < 128) {
Expand Down Expand Up @@ -77,7 +83,7 @@ object CharSWAR {
val limit = to - 7
while (i < limit) {
val word = Intrinsics.loadLong(barr.atRawUnsafe(i))
if (swarHasMatch(word)) return true
if (swarMatchMask(word) != 0L) return true
i += 8
}
// Tail: remaining 0-7 bytes
Expand All @@ -89,6 +95,28 @@ object CharSWAR {
false
}

def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = {
val len = to - from
if (len < 8) return findFirstEscapeCharScalar(arr, from, to)
val barr = arr.asInstanceOf[ByteArray]
var i = from
val limit = to - 7
while (i < limit) {
val word = Intrinsics.loadLong(barr.atRawUnsafe(i))
val mask = swarMatchMask(word)
if (mask != 0L) {
return i + firstMatchedByte(mask)
}
i += 8
}
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
i += 1
}
-1
}

@inline private def hasEscapeCharScalar(s: String, len: Int): Boolean = {
var i = 0
while (i < len) {
Expand All @@ -108,4 +136,14 @@ object CharSWAR {
}
false
}

@inline private def findFirstEscapeCharScalar(arr: Array[Byte], from: Int, to: Int): Int = {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
i += 1
}
-1
}
}
122 changes: 111 additions & 11 deletions sjsonnet/src/sjsonnet/BaseByteRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,14 @@ class BaseByteRenderer[T <: java.io.OutputStream](
}

/**
* SWAR-accelerated path for long strings. Converts to UTF-8 bytes once, scans with SWAR, and
* bulk-copies if clean. The getBytes allocation is amortized by avoiding per-char processing.
* SWAR-accelerated path for long strings. Converts to UTF-8 bytes once, then bulk-copies clean
* chunks and escapes only the bytes that require it.
*/
private def visitLongString(str: String): Unit = {
val bytes = str.getBytes(java.nio.charset.StandardCharsets.UTF_8)
if (!CharSWAR.hasEscapeChar(bytes, 0, bytes.length)) {
val bLen = bytes.length
val bLen = bytes.length
val firstEscape = CharSWAR.findFirstEscapeChar(bytes, 0, bLen)
if (firstEscape < 0) {
elemBuilder.ensureLength(bLen + 2)
val arr = elemBuilder.arr
val pos = elemBuilder.length
Expand All @@ -322,13 +323,92 @@ class BaseByteRenderer[T <: java.io.OutputStream](
arr(pos + 1 + bLen) = '"'.toByte
elemBuilder.length = pos + bLen + 2
} else {
upickle.core.RenderUtils.escapeByte(
unicodeCharBuilder,
elemBuilder,
str,
escapeUnicode = false,
wrapQuotes = true
)
val escapedLen = escapedStringLength(bytes, bLen, firstEscape)
elemBuilder.ensureLength(escapedLen)
val arr = elemBuilder.arr
var outPos = elemBuilder.length
arr(outPos) = '"'.toByte
outPos += 1
var from = 0
var escPos = firstEscape
while (escPos >= 0) {
if (escPos > from) {
val chunkLen = escPos - from
System.arraycopy(bytes, from, arr, outPos, chunkLen)
outPos += chunkLen
}
outPos = escapeByteInline(bytes(escPos) & 0xff, arr, outPos)
from = escPos + 1
escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1
}
if (from < bLen) {
val tailLen = bLen - from
System.arraycopy(bytes, from, arr, outPos, tailLen)
outPos += tailLen
}
arr(outPos) = '"'.toByte
elemBuilder.length = outPos + 1
}
}

private def escapedStringLength(bytes: Array[Byte], bLen: Int, firstEscape: Int): Int = {
var len = bLen + 2
var from = firstEscape
var escPos = firstEscape
while (escPos >= 0) {
len += escapeExtraLength(bytes(escPos) & 0xff)
from = escPos + 1
escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1
}
len
}

@inline private def escapeExtraLength(b: Int): Int =
(b: @scala.annotation.switch) match {
case '"' | '\\' | '\b' | '\f' | '\n' | '\r' | '\t' => 1
case _ => 5
}

/** Inline JSON escape for one byte that is known to require escaping. */
@inline private def escapeByteInline(b: Int, arr: Array[Byte], outPos0: Int): Int = {
val outPos = outPos0
(b: @scala.annotation.switch) match {
case '"' =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = '"'.toByte
outPos + 2
case '\\' =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = '\\'.toByte
outPos + 2
case '\b' =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = 'b'.toByte
outPos + 2
case '\f' =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = 'f'.toByte
outPos + 2
case '\n' =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = 'n'.toByte
outPos + 2
case '\r' =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = 'r'.toByte
outPos + 2
case '\t' =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = 't'.toByte
outPos + 2
case c =>
arr(outPos) = '\\'.toByte
arr(outPos + 1) = 'u'.toByte
arr(outPos + 2) = '0'.toByte
arr(outPos + 3) = '0'.toByte
arr(outPos + 4) = BaseByteRenderer.HEX_BYTES((c >> 4) & 0xf)
arr(outPos + 5) = BaseByteRenderer.HEX_BYTES(c & 0xf)
outPos + 6
}
}

Expand Down Expand Up @@ -377,6 +457,26 @@ object BaseByteRenderer {
a
}

/** Hex digits used by inline byte escaping for control chars. */
private[sjsonnet] val HEX_BYTES: Array[Byte] = Array(
'0'.toByte,
'1'.toByte,
'2'.toByte,
'3'.toByte,
'4'.toByte,
'5'.toByte,
'6'.toByte,
'7'.toByte,
'8'.toByte,
'9'.toByte,
'a'.toByte,
'b'.toByte,
'c'.toByte,
'd'.toByte,
'e'.toByte,
'f'.toByte
)

/**
* Reusable scratch buffer for writeLongDirect (max 20 bytes for Long.MinValue). Not thread-safe,
* but renderers are single-threaded.
Expand Down
Loading