diff --git a/bench/resources/sjsonnet_suite/lazy_array_slice_remove.jsonnet b/bench/resources/sjsonnet_suite/lazy_array_slice_remove.jsonnet new file mode 100644 index 00000000..be43529b --- /dev/null +++ b/bench/resources/sjsonnet_suite/lazy_array_slice_remove.jsonnet @@ -0,0 +1,17 @@ +local n = 500000; +local source = std.range(0, n - 1); +local sliced = source[123:n - 123:2]; +local trimmed = std.removeAt(sliced, 17); +local trimmedLen = std.length(trimmed); +local probes = std.range(0, 19999); +{ + len: trimmedLen, + first: trimmed[0], + mid: trimmed[std.floor(trimmedLen / 2)], + last: trimmed[trimmedLen - 1], + checksum: std.foldl( + function(acc, i) acc + trimmed[(i * 7919) % trimmedLen], + probes, + 0 + ), +} diff --git a/bench/resources/sjsonnet_suite/lazy_array_slice_remove.jsonnet.golden b/bench/resources/sjsonnet_suite/lazy_array_slice_remove.jsonnet.golden new file mode 100644 index 00000000..66a01911 --- /dev/null +++ b/bench/resources/sjsonnet_suite/lazy_array_slice_remove.jsonnet.golden @@ -0,0 +1,7 @@ +{ + "checksum": 4998971188, + "first": 123, + "last": 499875, + "len": 249876, + "mid": 250001 +} diff --git a/sjsonnet/src/sjsonnet/Util.scala b/sjsonnet/src/sjsonnet/Util.scala index c6c5fdde..d716ca4d 100644 --- a/sjsonnet/src/sjsonnet/Util.scala +++ b/sjsonnet/src/sjsonnet/Util.scala @@ -19,31 +19,6 @@ object Util { s"${line + 1}:${col + 1}" } - private def sliceArr(arr: Val.Arr, start: Int, end: Int, step: Int): Array[Eval] = { - if (start >= end || start >= arr.length) { - Array.empty[Eval] - } else - step match { - case 1 => - val safeEnd = math.min(end, arr.length) - val result = new Array[Eval](safeEnd - start) - var i = start - while (i < safeEnd) { - result(i - start) = arr.eval(i) - i += 1 - } - result - case _ => - val builder = new scala.collection.mutable.ArrayBuilder.ofRef[Eval] - var i = start - while (i < end && i < arr.length) { - if (i >= 0) builder += arr.eval(i) - i += step - } - builder.result() - } - } - def slice( pos: Position, ev: EvalScope, @@ -77,7 +52,7 @@ object Util { } val res = indexable match { case Val.Str(_, s) => Val.Str(pos, Util.sliceStr(s, start, end, step)) - case arr: Val.Arr => Val.Arr(pos, Util.sliceArr(arr, start, end, step)) + case arr: Val.Arr => arr.sliced(pos, start, end, step) case _ => Error.fail("Can only slice array or string, not " + indexable.prettyName, pos)(ev) } res: Val diff --git a/sjsonnet/src/sjsonnet/Val.scala b/sjsonnet/src/sjsonnet/Val.scala index dca42c60..9c1e9480 100644 --- a/sjsonnet/src/sjsonnet/Val.scala +++ b/sjsonnet/src/sjsonnet/Val.scala @@ -645,8 +645,8 @@ object Val { } /** - * Concatenate two arrays. For large left-side arrays where neither operand is already a concat - * view, creates a lazy ConcatView that defers the copy until bulk access is needed. This is + * Concatenate two arrays. For large results where neither operand is already a concat view, + * creates a lazy ConcatView that defers the copy until bulk access is needed. This is * particularly beneficial for patterns like `big_array + [x] < big_array + [y]` where * element-wise comparison via value(i) never needs the flattened backing array. * @@ -655,28 +655,34 @@ object Val { */ def concat(newPos: Position, rhs: Arr): Arr = { val leftLen = this.length - // Use lazy concat when the left side is large enough that avoiding the + val rhsLen = rhs.length + val totalLenLong = leftLen.toLong + rhsLen.toLong + if (totalLenLong > Int.MaxValue) Error.fail("array too large") + val totalLen = totalLenLong.toInt + // Use lazy concat when the result is large enough that avoiding the // arraycopy is worthwhile. Limit to depth-1 (neither side is a concat view) // to prevent O(depth) access chains and ensure value(i) stays O(1). - if (leftLen >= Arr.LAZY_CONCAT_THRESHOLD && !this.isConcatView && !rhs.isConcatView) { + if (totalLen >= Arr.LAZY_CONCAT_THRESHOLD && !this.isConcatView && !rhs.isConcatView) { val result = Arr(newPos, Arr.EMPTY_EVAL_ARRAY) result._concatLeft = this result._concatRight = rhs - result._length = leftLen + rhs.length + result._length = totalLen result } else { // Eager path: allocate + arraycopy (also used when either side is a concat view // to flatten and prevent deep nesting) val lArr = this.asLazyArray val rArr = rhs.asLazyArray - val rLen = rArr.length - val result = new Array[Eval](leftLen + rLen) + val result = new Array[Eval](totalLen) System.arraycopy(lArr, 0, result, 0, leftLen) - System.arraycopy(rArr, 0, result, leftLen, rLen) + System.arraycopy(rArr, 0, result, leftLen, rhsLen) Arr(newPos, result) } } + def sliced(newPos: Position, start: Int, end: Int, step: Int): Arr = + Arr.sliced(newPos, this, start, end, step) + def iterator: Iterator[Val] = { val self = this new Iterator[Val] { @@ -861,6 +867,46 @@ object Val { new RepeatedArr(newPos, source.reversed(newPos), count) } + private final class SliceArr( + pos0: Position, + private[this] var source: Arr, + private[this] val start: Int, + private[this] val step: Int, + size: Int) + extends Arr(pos0, null) { + _length = size + + @inline private[this] def sourceIndex(i: Int): Int = start + i * step + + override def value(i: Int): Val = + if ((arr ne null) || isConcatView) super.value(i) + else source.value(sourceIndex(i)) + + override def eval(i: Int): Eval = + if ((arr ne null) || isConcatView) super.eval(i) + else source.eval(sourceIndex(i)) + + override def asLazyArray: Array[Eval] = { + if ((arr ne null) || isConcatView) super.asLazyArray + else { + val len = _length + val result = new Array[Eval](len) + var i = 0 + while (i < len) { + result(i) = source.eval(sourceIndex(i)) + i += 1 + } + arr = result + source = null + result + } + } + + override def reversed(newPos: Position): Arr = + if ((arr ne null) || isConcatView) super.reversed(newPos) + else new SliceArr(newPos, source, sourceIndex(_length - 1), -step, _length) + } + /** * Base class for array-level lazy views. * @@ -1236,6 +1282,57 @@ object Val { if (count == 0 || source.length == 0) Arr(pos, EMPTY_EVAL_ARRAY) else new RepeatedArr(pos, source, count) + def sliced(pos: Position, source: Arr, start: Int, end: Int, step: Int): Arr = { + val sourceLen = source.length + val first = firstSliceIndex(sourceLen, start, end, step) + val len = sliceLength(sourceLen, first, end, step) + if (len == 0) Arr(pos, EMPTY_EVAL_ARRAY) + else if (!useSliceView(source, sourceLen, len)) copySlice(pos, source, first, step, len) + else new SliceArr(pos, source, first, step, len) + } + + private def useSliceView(source: Arr, sourceLen: Int, sliceLen: Int): Boolean = { + if (sliceLen < LAZY_VIEW_THRESHOLD) false + else + source match { + case _: RangeArr | _: ByteArr | _: RepeatedArr | _: LazyViewArr | _: ReversedLazyViewArr | + _: SliceArr => + true + case _ => + // Plain flat/reversed/concat arrays may retain a large backing graph. Copy small + // sub-slices so a short-lived result does not pin much more memory than it exposes. + sliceLen.toLong * SLICE_VIEW_MIN_SOURCE_FRACTION_DEN >= sourceLen.toLong + } + } + + private def firstSliceIndex(sourceLen: Int, start: Int, end: Int, step: Int): Int = { + if (step <= 0) throw new IllegalArgumentException("slice step must be positive") + if (start >= end || start >= sourceLen || start >= 0) start + else { + val skipped = ((-start.toLong) + step.toLong - 1L) / step.toLong + (start.toLong + skipped * step.toLong).toInt + } + } + + private def sliceLength(sourceLen: Int, start: Int, end: Int, step: Int): Int = { + if (start >= end || start >= sourceLen) 0 + else { + val safeEnd = math.min(end, sourceLen) + if (safeEnd <= start) 0 + else (((safeEnd.toLong - start.toLong - 1L) / step.toLong) + 1L).toInt + } + } + + private def copySlice(pos: Position, source: Arr, start: Int, step: Int, len: Int): Arr = { + val result = new Array[Eval](len) + var i = 0 + while (i < len) { + result(i) = source.eval(start + i * step) + i += 1 + } + Arr(pos, result) + } + def mapped(pos: Position, source: Arr, func: Func, callPos: Position, ev: EvalScope): Arr = if (source.length == 0) Arr(pos, EMPTY_EVAL_ARRAY) else if (source.length < LAZY_VIEW_THRESHOLD) { @@ -1299,7 +1396,7 @@ object Val { def range(pos: Position, from: Int, size: Int): Arr = new RangeArr(pos, from, size) /** - * Threshold for lazy concat. Arrays with left.length >= this value use a virtual ConcatView + * Threshold for lazy concat. Concats with total length >= this value use a virtual ConcatView * instead of eager arraycopy. Below this size, arraycopy is cheap enough that the indirection * overhead of virtual dispatch outweighs the copy savings. */ @@ -1313,6 +1410,8 @@ object Val { */ val LAZY_VIEW_THRESHOLD = 4096 + private val SLICE_VIEW_MIN_SOURCE_FRACTION_DEN = 4 + private[sjsonnet] val EMPTY_EVAL_ARRAY: Array[Eval] = Array.empty[Eval] } diff --git a/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala b/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala index 338bc062..26cea699 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala @@ -67,6 +67,18 @@ object ArrayModule extends AbstractFunctionModule { sum } + private def removeAtView(arr: Val.Arr, removeIdx: Int): Val.Arr = { + val len = arr.length + if (len == 1) Val.Arr(arr.pos, Val.Arr.EMPTY_EVAL_ARRAY) + else if (removeIdx == 0) arr.sliced(arr.pos, 1, len, 1) + else if (removeIdx == len - 1) arr.sliced(arr.pos, 0, removeIdx, 1) + else { + val left = arr.sliced(arr.pos, 0, removeIdx, 1) + val right = arr.sliced(arr.pos, removeIdx + 1, len, 1) + left.concat(arr.pos, right) + } + } + /** * [[https://jsonnet.org/ref/stdlib.html#std-minArray std.minArray(arr, keyF, onEmpty)]]. * @@ -948,17 +960,7 @@ object ArrayModule extends AbstractFunctionModule { if (idx == -1) { arr } else { - val result = new Array[Eval](arr.length - 1) - var j = 0 - while (j < idx) { - result(j) = arr.eval(j) - j += 1 - } - while (j < result.length) { - result(j) = arr.eval(j + 1) - j += 1 - } - Val.Arr(arr.pos, result) + removeAtView(arr, idx) } }, /** @@ -976,19 +978,7 @@ object ArrayModule extends AbstractFunctionModule { case _ => -1 } if (removeIdx == -1) arr - else { - val result = new Array[Eval](arr.length - 1) - var i = 0 - while (i < removeIdx) { - result(i) = arr.eval(i) - i += 1 - } - while (i < result.length) { - result(i) = arr.eval(i + 1) - i += 1 - } - Val.Arr(arr.pos, result) - } + else removeAtView(arr, removeIdx) }, /** * [[https://jsonnet.org/ref/stdlib.html#std-sum std.sum(arr)]]. diff --git a/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala b/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala index b9080b80..9c31a6e9 100644 --- a/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala +++ b/sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala @@ -57,9 +57,18 @@ object Std0150FunctionsTests extends TestSuite { test("slice") { eval("std.slice([1, 2, 3, 4, 5, 6], 0, 4, 1)") ==> ujson.read("[ 1, 2, 3, 4 ]") eval("std.slice([1, 2, 3, 4, 5, 6], 1, 6, 2)") ==> ujson.read("[ 2, 4, 6 ]") + eval("std.slice(std.range(1, 6), -3, 99, 1)") ==> ujson.Arr(4, 5, 6) + eval("std.slice(std.range(1, 6), 1, 6, 2)") ==> ujson.Arr(2, 4, 6) eval("""std.slice("jsonnet", 0, 4, 1)""") ==> ujson.Str("json") } + test("remove range arrays") { + eval("std.remove(std.range(1, 5), 3)") ==> ujson.Arr(1, 2, 4, 5) + eval("std.removeAt(std.range(1, 5), 0)") ==> ujson.Arr(2, 3, 4, 5) + eval("std.removeAt(std.range(1, 5), 2)") ==> ujson.Arr(1, 2, 4, 5) + eval("std.removeAt(std.range(1, 5), 4)") ==> ujson.Arr(1, 2, 3, 4) + } + test("splitLimit negative maxsplits") { eval("""std.splitLimit("a,b,c", ",", -2)""") ==> ujson.Arr("a", "b", "c") eval("""std.splitLimit("a,b,c", ",", -2.5)""") ==> ujson.Arr("a", "b", "c") diff --git a/sjsonnet/test/src/sjsonnet/ValArrayViewTests.scala b/sjsonnet/test/src/sjsonnet/ValArrayViewTests.scala new file mode 100644 index 00000000..069c1e18 --- /dev/null +++ b/sjsonnet/test/src/sjsonnet/ValArrayViewTests.scala @@ -0,0 +1,47 @@ +package sjsonnet + +import utest._ + +object ValArrayViewTests extends TestSuite { + private val testPos = new Position(null, -1) + + private final class NoBulkArr(size: Int) extends Val.Arr(testPos, Val.Arr.EMPTY_EVAL_ARRAY) { + var bulkCalls: Int = 0 + + override def length: Int = size + + override def value(i: Int): Val = Val.cachedNum(testPos, i) + + override def eval(i: Int): Eval = Val.cachedNum(testPos, i) + + override def asLazyArray: Array[Eval] = { + bulkCalls += 1 + throw new java.lang.AssertionError("source asLazyArray should not be required") + } + } + + def tests: Tests = Tests { + test("sliceAvoidsBulkMaterializingSource") { + val source = new NoBulkArr(10000) + + val small = source.sliced(testPos, 10, 16, 2) + small.asStrictArray.map(_.asDouble).toSeq ==> Seq(10.0, 12.0, 14.0) + source.bulkCalls ==> 0 + + val large = source.sliced(testPos, 100, 9000, 3) + large.length ==> 2967 + large.value(0).asDouble ==> 100.0 + large.value(1).asDouble ==> 103.0 + large.value(large.length - 1).asDouble ==> 8998.0 + + val reversed = large.reversed(testPos) + reversed.value(0).asDouble ==> 8998.0 + reversed.value(reversed.length - 1).asDouble ==> 100.0 + + val materialized = large.asLazyArray + materialized.length ==> 2967 + materialized(2).value.asDouble ==> 106.0 + source.bulkCalls ==> 0 + } + } +}