1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ import { Data } from '../data.js' ;
1819import { Vector } from '../vector.js' ;
1920import { Visitor } from '../visitor.js' ;
2021import { Type , Precision } from '../enum.js' ;
2122import { TypeToDataType } from '../interfaces.js' ;
23+ import { instance as getVisitor } from './get.js' ;
2224import {
2325 DataType , Dictionary ,
2426 Bool , Null , Utf8 , Utf8View , LargeUtf8 , Binary , BinaryView , LargeBinary , Decimal , FixedSizeBinary , List , LargeList , FixedSizeList , Map_ , Struct ,
@@ -131,6 +133,19 @@ function vectorIterator<T extends DataType>(vector: Vector<T>): IterableIterator
131133 } ) ;
132134}
133135
136+ /** @ignore */
137+ function runEndEncodedIterator < T extends RunEndEncoded > ( vector : Vector < T > ) : IterableIterator < T [ 'TValue' ] | null > {
138+ // Use specialized iterator with O(1) amortized sequential access
139+ let offset = 0 ;
140+ return new ChunkedIterator ( vector . data . length , ( chunkIndex ) => {
141+ const data = vector . data [ chunkIndex ] ;
142+ const length = data . length ;
143+ const inner = vector . slice ( offset , offset + length ) ;
144+ offset += length ;
145+ return new RunEndEncodedIterator ( inner ) ;
146+ } ) ;
147+ }
148+
134149/** @ignore */
135150class VectorIterator < T extends DataType > implements IterableIterator < T [ 'TValue' ] | null > {
136151 private index = 0 ;
@@ -152,6 +167,89 @@ class VectorIterator<T extends DataType> implements IterableIterator<T['TValue']
152167 }
153168}
154169
170+ /** @ignore */
171+ class RunEndEncodedIterator < T extends RunEndEncoded > implements IterableIterator < T [ 'TValue' ] | null > {
172+ private index = 0 ;
173+ private lastPhysicalIndex = 0 ;
174+ private readonly runEnds : Data < T [ 'runEndsType' ] > ;
175+ private readonly values : Data < T [ 'valueType' ] > ;
176+ private readonly getRunEnd : ( data : Data < T [ 'runEndsType' ] > , index : number ) => T [ 'runEndsType' ] [ 'TValue' ] | null ;
177+ private readonly getValue : ( data : Data < T [ 'valueType' ] > , index : number ) => T [ 'TValue' ] | null ;
178+
179+ constructor ( private vector : Vector < T > ) {
180+ const data = vector . data [ 0 ] ;
181+ this . runEnds = data . children [ 0 ] as Data < T [ 'runEndsType' ] > ;
182+ this . values = data . children [ 1 ] as Data < T [ 'valueType' ] > ;
183+ this . getRunEnd = getVisitor . getVisitFn ( this . runEnds ) ;
184+ this . getValue = getVisitor . getVisitFn ( this . values ) ;
185+ }
186+
187+ next ( ) : IteratorResult < T [ 'TValue' ] | null > {
188+ if ( this . index < this . vector . length ) {
189+ const value = this . getValueAtIndex ( this . index ++ ) ;
190+ return { value } ;
191+ }
192+ return { done : true , value : null } ;
193+ }
194+
195+ private getValueAtIndex ( logicalIndex : number ) : T [ 'TValue' ] {
196+ const physicalIndex = this . findPhysicalIndex ( logicalIndex ) ;
197+ return this . getValue ( this . values , physicalIndex ) ;
198+ }
199+
200+ private findPhysicalIndex ( i : number ) : number {
201+ const runEndsLength = this . runEnds . length ;
202+ const offset = this . vector . data [ 0 ] . offset ;
203+
204+ // Fast path: check if the cached physical index is still valid
205+ const cachedRunEnd = Number ( this . getRunEnd ( this . runEnds , this . lastPhysicalIndex ) ) ;
206+ if ( offset + i < cachedRunEnd ) {
207+ // Cached value is an upper bound, but is it the least upper bound?
208+ if ( this . lastPhysicalIndex === 0 ) {
209+ return this . lastPhysicalIndex ;
210+ }
211+ const prevRunEnd = Number ( this . getRunEnd ( this . runEnds , this . lastPhysicalIndex - 1 ) ) ;
212+ if ( offset + i >= prevRunEnd ) {
213+ // Cache hit - same run as before
214+ return this . lastPhysicalIndex ;
215+ }
216+ // Search in the range before the cached index
217+ this . lastPhysicalIndex = this . binarySearchRange ( 0 , this . lastPhysicalIndex , i , offset ) ;
218+ return this . lastPhysicalIndex ;
219+ }
220+
221+ // Cached index is not an upper bound, search after it
222+ const minPhysicalIndex = this . lastPhysicalIndex + 1 ;
223+ const relativeIndex = this . binarySearchRange (
224+ minPhysicalIndex ,
225+ runEndsLength ,
226+ i ,
227+ offset
228+ ) ;
229+ this . lastPhysicalIndex = relativeIndex ;
230+ return this . lastPhysicalIndex ;
231+ }
232+
233+ private binarySearchRange ( start : number , end : number , i : number , offset : number ) : number {
234+ let low = start ;
235+ let high = end - 1 ;
236+ while ( low < high ) {
237+ const mid = ( low + high ) >>> 1 ;
238+ const runEnd = Number ( this . getRunEnd ( this . runEnds , mid ) ) ;
239+ if ( offset + i < runEnd ) {
240+ high = mid ;
241+ } else {
242+ low = mid + 1 ;
243+ }
244+ }
245+ return low ;
246+ }
247+
248+ [ Symbol . iterator ] ( ) {
249+ return this ;
250+ }
251+ }
252+
155253IteratorVisitor . prototype . visitNull = vectorIterator ;
156254IteratorVisitor . prototype . visitBool = vectorIterator ;
157255IteratorVisitor . prototype . visitInt = vectorIterator ;
@@ -206,7 +304,7 @@ IteratorVisitor.prototype.visitDurationMicrosecond = vectorIterator;
206304IteratorVisitor . prototype . visitDurationNanosecond = vectorIterator ;
207305IteratorVisitor . prototype . visitFixedSizeList = vectorIterator ;
208306IteratorVisitor . prototype . visitMap = vectorIterator ;
209- IteratorVisitor . prototype . visitRunEndEncoded = vectorIterator ;
307+ IteratorVisitor . prototype . visitRunEndEncoded = runEndEncodedIterator ;
210308
211309/** @ignore */
212310export const instance = new IteratorVisitor ( ) ;
0 commit comments