Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions bench/resources/sjsonnet_suite/lazy_array_slice_remove.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
local n = 500000;
local source = std.range(0, n - 1);
local sliced = source[123:n - 123:2];
local trimmed = std.removeAt(sliced, 17);
local trimmedLen = std.length(trimmed);
local probes = std.range(0, 19999);
{
len: trimmedLen,
first: trimmed[0],
mid: trimmed[std.floor(trimmedLen / 2)],
last: trimmed[trimmedLen - 1],
checksum: std.foldl(
function(acc, i) acc + trimmed[(i * 7919) % trimmedLen],
probes,
0
),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"checksum": 4998971188,
"first": 123,
"last": 499875,
"len": 249876,
"mid": 250001
}
27 changes: 1 addition & 26 deletions sjsonnet/src/sjsonnet/Util.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,6 @@ object Util {
s"${line + 1}:${col + 1}"
}

private def sliceArr(arr: Val.Arr, start: Int, end: Int, step: Int): Array[Eval] = {
if (start >= end || start >= arr.length) {
Array.empty[Eval]
} else
step match {
case 1 =>
val safeEnd = math.min(end, arr.length)
val result = new Array[Eval](safeEnd - start)
var i = start
while (i < safeEnd) {
result(i - start) = arr.eval(i)
i += 1
}
result
case _ =>
val builder = new scala.collection.mutable.ArrayBuilder.ofRef[Eval]
var i = start
while (i < end && i < arr.length) {
if (i >= 0) builder += arr.eval(i)
i += step
}
builder.result()
}
}

def slice(
pos: Position,
ev: EvalScope,
Expand Down Expand Up @@ -77,7 +52,7 @@ object Util {
}
val res = indexable match {
case Val.Str(_, s) => Val.Str(pos, Util.sliceStr(s, start, end, step))
case arr: Val.Arr => Val.Arr(pos, Util.sliceArr(arr, start, end, step))
case arr: Val.Arr => arr.sliced(pos, start, end, step)
case _ => Error.fail("Can only slice array or string, not " + indexable.prettyName, pos)(ev)
}
res: Val
Expand Down
117 changes: 108 additions & 9 deletions sjsonnet/src/sjsonnet/Val.scala
Original file line number Diff line number Diff line change
Expand Up @@ -645,8 +645,8 @@ object Val {
}

/**
* Concatenate two arrays. For large left-side arrays where neither operand is already a concat
* view, creates a lazy ConcatView that defers the copy until bulk access is needed. This is
* Concatenate two arrays. For large results where neither operand is already a concat view,
* creates a lazy ConcatView that defers the copy until bulk access is needed. This is
* particularly beneficial for patterns like `big_array + [x] < big_array + [y]` where
* element-wise comparison via value(i) never needs the flattened backing array.
*
Expand All @@ -655,28 +655,34 @@ object Val {
*/
def concat(newPos: Position, rhs: Arr): Arr = {
val leftLen = this.length
// Use lazy concat when the left side is large enough that avoiding the
val rhsLen = rhs.length
val totalLenLong = leftLen.toLong + rhsLen.toLong
if (totalLenLong > Int.MaxValue) Error.fail("array too large")
val totalLen = totalLenLong.toInt
// Use lazy concat when the result is large enough that avoiding the
// arraycopy is worthwhile. Limit to depth-1 (neither side is a concat view)
// to prevent O(depth) access chains and ensure value(i) stays O(1).
if (leftLen >= Arr.LAZY_CONCAT_THRESHOLD && !this.isConcatView && !rhs.isConcatView) {
if (totalLen >= Arr.LAZY_CONCAT_THRESHOLD && !this.isConcatView && !rhs.isConcatView) {
val result = Arr(newPos, Arr.EMPTY_EVAL_ARRAY)
result._concatLeft = this
result._concatRight = rhs
result._length = leftLen + rhs.length
result._length = totalLen
result
} else {
// Eager path: allocate + arraycopy (also used when either side is a concat view
// to flatten and prevent deep nesting)
val lArr = this.asLazyArray
val rArr = rhs.asLazyArray
val rLen = rArr.length
val result = new Array[Eval](leftLen + rLen)
val result = new Array[Eval](totalLen)
System.arraycopy(lArr, 0, result, 0, leftLen)
System.arraycopy(rArr, 0, result, leftLen, rLen)
System.arraycopy(rArr, 0, result, leftLen, rhsLen)
Arr(newPos, result)
}
}

def sliced(newPos: Position, start: Int, end: Int, step: Int): Arr =
Arr.sliced(newPos, this, start, end, step)

def iterator: Iterator[Val] = {
val self = this
new Iterator[Val] {
Expand Down Expand Up @@ -861,6 +867,46 @@ object Val {
new RepeatedArr(newPos, source.reversed(newPos), count)
}

private final class SliceArr(
pos0: Position,
private[this] var source: Arr,
private[this] val start: Int,
private[this] val step: Int,
size: Int)
extends Arr(pos0, null) {
_length = size

@inline private[this] def sourceIndex(i: Int): Int = start + i * step

override def value(i: Int): Val =
if ((arr ne null) || isConcatView) super.value(i)
else source.value(sourceIndex(i))

override def eval(i: Int): Eval =
if ((arr ne null) || isConcatView) super.eval(i)
else source.eval(sourceIndex(i))

override def asLazyArray: Array[Eval] = {
if ((arr ne null) || isConcatView) super.asLazyArray
else {
val len = _length
val result = new Array[Eval](len)
var i = 0
while (i < len) {
result(i) = source.eval(sourceIndex(i))
i += 1
}
arr = result
source = null
result
}
}

override def reversed(newPos: Position): Arr =
if ((arr ne null) || isConcatView) super.reversed(newPos)
else new SliceArr(newPos, source, sourceIndex(_length - 1), -step, _length)
}

/**
* Base class for array-level lazy views.
*
Expand Down Expand Up @@ -1236,6 +1282,57 @@ object Val {
if (count == 0 || source.length == 0) Arr(pos, EMPTY_EVAL_ARRAY)
else new RepeatedArr(pos, source, count)

def sliced(pos: Position, source: Arr, start: Int, end: Int, step: Int): Arr = {
val sourceLen = source.length
val first = firstSliceIndex(sourceLen, start, end, step)
val len = sliceLength(sourceLen, first, end, step)
if (len == 0) Arr(pos, EMPTY_EVAL_ARRAY)
else if (!useSliceView(source, sourceLen, len)) copySlice(pos, source, first, step, len)
else new SliceArr(pos, source, first, step, len)
}

private def useSliceView(source: Arr, sourceLen: Int, sliceLen: Int): Boolean = {
if (sliceLen < LAZY_VIEW_THRESHOLD) false
else
source match {
case _: RangeArr | _: ByteArr | _: RepeatedArr | _: LazyViewArr | _: ReversedLazyViewArr |
_: SliceArr =>
true
case _ =>
// Plain flat/reversed/concat arrays may retain a large backing graph. Copy small
// sub-slices so a short-lived result does not pin much more memory than it exposes.
sliceLen.toLong * SLICE_VIEW_MIN_SOURCE_FRACTION_DEN >= sourceLen.toLong
}
}

private def firstSliceIndex(sourceLen: Int, start: Int, end: Int, step: Int): Int = {
if (step <= 0) throw new IllegalArgumentException("slice step must be positive")
if (start >= end || start >= sourceLen || start >= 0) start
else {
val skipped = ((-start.toLong) + step.toLong - 1L) / step.toLong
(start.toLong + skipped * step.toLong).toInt
}
}

private def sliceLength(sourceLen: Int, start: Int, end: Int, step: Int): Int = {
if (start >= end || start >= sourceLen) 0
else {
val safeEnd = math.min(end, sourceLen)
if (safeEnd <= start) 0
else (((safeEnd.toLong - start.toLong - 1L) / step.toLong) + 1L).toInt
}
}

private def copySlice(pos: Position, source: Arr, start: Int, step: Int, len: Int): Arr = {
val result = new Array[Eval](len)
var i = 0
while (i < len) {
result(i) = source.eval(start + i * step)
i += 1
}
Arr(pos, result)
}

def mapped(pos: Position, source: Arr, func: Func, callPos: Position, ev: EvalScope): Arr =
if (source.length == 0) Arr(pos, EMPTY_EVAL_ARRAY)
else if (source.length < LAZY_VIEW_THRESHOLD) {
Expand Down Expand Up @@ -1299,7 +1396,7 @@ object Val {
def range(pos: Position, from: Int, size: Int): Arr = new RangeArr(pos, from, size)

/**
* Threshold for lazy concat. Arrays with left.length >= this value use a virtual ConcatView
* Threshold for lazy concat. Concats with total length >= this value use a virtual ConcatView
* instead of eager arraycopy. Below this size, arraycopy is cheap enough that the indirection
* overhead of virtual dispatch outweighs the copy savings.
*/
Expand All @@ -1313,6 +1410,8 @@ object Val {
*/
val LAZY_VIEW_THRESHOLD = 4096

private val SLICE_VIEW_MIN_SOURCE_FRACTION_DEN = 4

private[sjsonnet] val EMPTY_EVAL_ARRAY: Array[Eval] = Array.empty[Eval]
}

Expand Down
38 changes: 14 additions & 24 deletions sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,18 @@ object ArrayModule extends AbstractFunctionModule {
sum
}

private def removeAtView(arr: Val.Arr, removeIdx: Int): Val.Arr = {
val len = arr.length
if (len == 1) Val.Arr(arr.pos, Val.Arr.EMPTY_EVAL_ARRAY)
else if (removeIdx == 0) arr.sliced(arr.pos, 1, len, 1)
else if (removeIdx == len - 1) arr.sliced(arr.pos, 0, removeIdx, 1)
else {
val left = arr.sliced(arr.pos, 0, removeIdx, 1)
val right = arr.sliced(arr.pos, removeIdx + 1, len, 1)
left.concat(arr.pos, right)
}
}

/**
* [[https://jsonnet.org/ref/stdlib.html#std-minArray std.minArray(arr, keyF, onEmpty)]].
*
Expand Down Expand Up @@ -948,17 +960,7 @@ object ArrayModule extends AbstractFunctionModule {
if (idx == -1) {
arr
} else {
val result = new Array[Eval](arr.length - 1)
var j = 0
while (j < idx) {
result(j) = arr.eval(j)
j += 1
}
while (j < result.length) {
result(j) = arr.eval(j + 1)
j += 1
}
Val.Arr(arr.pos, result)
removeAtView(arr, idx)
}
},
/**
Expand All @@ -976,19 +978,7 @@ object ArrayModule extends AbstractFunctionModule {
case _ => -1
}
if (removeIdx == -1) arr
else {
val result = new Array[Eval](arr.length - 1)
var i = 0
while (i < removeIdx) {
result(i) = arr.eval(i)
i += 1
}
while (i < result.length) {
result(i) = arr.eval(i + 1)
i += 1
}
Val.Arr(arr.pos, result)
}
else removeAtView(arr, removeIdx)
},
/**
* [[https://jsonnet.org/ref/stdlib.html#std-sum std.sum(arr)]].
Expand Down
9 changes: 9 additions & 0 deletions sjsonnet/test/src/sjsonnet/Std0150FunctionsTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,18 @@ object Std0150FunctionsTests extends TestSuite {
test("slice") {
eval("std.slice([1, 2, 3, 4, 5, 6], 0, 4, 1)") ==> ujson.read("[ 1, 2, 3, 4 ]")
eval("std.slice([1, 2, 3, 4, 5, 6], 1, 6, 2)") ==> ujson.read("[ 2, 4, 6 ]")
eval("std.slice(std.range(1, 6), -3, 99, 1)") ==> ujson.Arr(4, 5, 6)
eval("std.slice(std.range(1, 6), 1, 6, 2)") ==> ujson.Arr(2, 4, 6)
eval("""std.slice("jsonnet", 0, 4, 1)""") ==> ujson.Str("json")
}

test("remove range arrays") {
eval("std.remove(std.range(1, 5), 3)") ==> ujson.Arr(1, 2, 4, 5)
eval("std.removeAt(std.range(1, 5), 0)") ==> ujson.Arr(2, 3, 4, 5)
eval("std.removeAt(std.range(1, 5), 2)") ==> ujson.Arr(1, 2, 4, 5)
eval("std.removeAt(std.range(1, 5), 4)") ==> ujson.Arr(1, 2, 3, 4)
}

test("splitLimit negative maxsplits") {
eval("""std.splitLimit("a,b,c", ",", -2)""") ==> ujson.Arr("a", "b", "c")
eval("""std.splitLimit("a,b,c", ",", -2.5)""") ==> ujson.Arr("a", "b", "c")
Expand Down
47 changes: 47 additions & 0 deletions sjsonnet/test/src/sjsonnet/ValArrayViewTests.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package sjsonnet

import utest._

object ValArrayViewTests extends TestSuite {
private val testPos = new Position(null, -1)

private final class NoBulkArr(size: Int) extends Val.Arr(testPos, Val.Arr.EMPTY_EVAL_ARRAY) {
var bulkCalls: Int = 0

override def length: Int = size

override def value(i: Int): Val = Val.cachedNum(testPos, i)

override def eval(i: Int): Eval = Val.cachedNum(testPos, i)

override def asLazyArray: Array[Eval] = {
bulkCalls += 1
throw new java.lang.AssertionError("source asLazyArray should not be required")
}
}

def tests: Tests = Tests {
test("sliceAvoidsBulkMaterializingSource") {
val source = new NoBulkArr(10000)

val small = source.sliced(testPos, 10, 16, 2)
small.asStrictArray.map(_.asDouble).toSeq ==> Seq(10.0, 12.0, 14.0)
source.bulkCalls ==> 0

val large = source.sliced(testPos, 100, 9000, 3)
large.length ==> 2967
large.value(0).asDouble ==> 100.0
large.value(1).asDouble ==> 103.0
large.value(large.length - 1).asDouble ==> 8998.0

val reversed = large.reversed(testPos)
reversed.value(0).asDouble ==> 8998.0
reversed.value(reversed.length - 1).asDouble ==> 100.0

val materialized = large.asLazyArray
materialized.length ==> 2967
materialized(2).value.asDouble ==> 106.0
source.bulkCalls ==> 0
}
}
}
Loading