Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Arrow.dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ export {
tableFromIPC, tableToIPC,
MessageReader, AsyncMessageReader, JSONMessageReader,
Message,
RecordBatch,
RecordBatch, recordBatchFromArrays,
util,
Builder, makeBuilder, builderThroughIterable, builderThroughAsyncIterable,
compressionRegistry, CompressionType,
Expand Down
2 changes: 1 addition & 1 deletion src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ export { compressionRegistry } from './ipc/compression/registry.js';
export type { Codec } from './ipc/compression/registry.js';
export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/message.js';
export { Message } from './ipc/metadata/message.js';
export { RecordBatch } from './recordbatch.js';
export { RecordBatch, recordBatchFromArrays } from './recordbatch.js';
export type { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces.js';

export {
Expand Down
28 changes: 27 additions & 1 deletion src/factories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,35 @@ export function vectorFromArray<T extends dtypes.DataType>(data: DataProps<T>):
export function vectorFromArray<T extends TypedArray | BigIntArray | readonly unknown[]>(data: T): Vector<ArrayDataType<T>>;

export function vectorFromArray(init: any, type?: dtypes.DataType) {
if (init instanceof Data || init instanceof Vector || init.type instanceof dtypes.DataType || ArrayBuffer.isView(init)) {
if (init instanceof Data || init instanceof Vector || init.type instanceof dtypes.DataType) {
return makeVector(init as any);
}
if (ArrayBuffer.isView(init) && !type) {
return makeVector(init as any);
}
if (ArrayBuffer.isView(init) && type) {
// Validate BigInt/number boundary
const isBigIntInput = init instanceof BigInt64Array || init instanceof BigUint64Array;
const isBigIntTarget = type.ArrayType === BigInt64Array || type.ArrayType === BigUint64Array;
if (isBigIntInput && !isBigIntTarget) {
throw new TypeError(
`Cannot convert BigInt input to ${type}. BigInt arrays can only target BigInt-based types (e.g. Int64, Uint64).`
);
}
if (!isBigIntInput && isBigIntTarget) {
throw new TypeError(
`Cannot convert non-BigInt input to ${type}. ${type} requires BigInt values.`
);
}

// Fast path: direct TypedArray conversion for Int and Float types
if (dtypes.DataType.isInt(type) || dtypes.DataType.isFloat(type)) {
const data = init.constructor === type.ArrayType
? init // zero-copy, same TypedArray type
: new (type.ArrayType as any)(init); // standard JS TypedArray conversion
Comment on lines +106 to +108
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is redundant, as this will happen in makeVector via makeData calling toArrayBufferView on the incoming typed array here and here.

toArrayBufferView returns a zero-copy ArrayBufferView of the desired type (here) when given an ArrayBuffer (or any ArrayBufferView).

return makeVector({ type, data, offset: 0, length: data.length, nullCount: 0 } as any);
}
Comment on lines +105 to +110
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fast path for Float16 type is incorrect when the input TypedArray is not already a Uint16Array. Float16 uses Uint16Array as its ArrayType, but the values stored are IEEE 754 half-precision encoded, not plain integers. Doing new Uint16Array(new Float32Array([1.5])) yields Uint16Array([1]), not the correct half-precision encoding (0x3E00).

Consider excluding Float16 (i.e., Precision.HALF) from this fast path so it falls through to the builder, which correctly handles the float16 encoding. For example, the condition could additionally check !(dtypes.DataType.isFloat(type) && (type as dtypes.Float).precision === Precision.HALF).

Copilot uses AI. Check for mistakes.
}
const options: IterableBuilderOptions = { type: type ?? inferType(init), nullValues: [null] };
const chunks = [...builderThroughIterable(options)(init)];
const vector = chunks.length === 1 ? chunks[0] : chunks.reduce((a, b) => a.concat(b));
Expand Down
57 changes: 57 additions & 0 deletions src/recordbatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import { Vector } from './vector.js';
import { Schema, Field } from './schema.js';
import { DataType, Struct, Null, TypeMap } from './type.js';
import { wrapIndex } from './util/vector.js';
import { vectorFromArray } from './factories.js';
import { ArrayDataType, BigIntArray, TypedArray } from './interfaces.js';

import { instance as getVisitor } from './visitor/get.js';
import { instance as setVisitor } from './visitor/set.js';
Expand Down Expand Up @@ -306,6 +308,61 @@ Object.defineProperty(RecordBatch, Symbol.hasInstance, {
},
});

/**
* Creates a new RecordBatch from an object of typed arrays or JavaScript arrays.
*
* @example
* ```ts
* const batch = recordBatchFromArrays({
* a: [1, 2, 3],
* b: new Int8Array([1, 2, 3]),
* });
* ```
*
* @example
* ```ts
* const schema = new Schema([
* new Field('a', new Int32),
* new Field('b', new Utf8),
* ]);
* const batch = recordBatchFromArrays({ a: [1, 2, 3], b: ['x', 'y', 'z'] }, schema);
* ```
*
* @param input An object mapping column names to typed arrays or JavaScript arrays.
* @param schema Optional schema to control column types, ordering, nullability, and metadata.
* @returns A new RecordBatch.
*/
export function recordBatchFromArrays<T extends TypeMap>(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema: Schema<T>
): RecordBatch<T>;
export function recordBatchFromArrays<I extends Record<string | number | symbol, TypedArray | BigIntArray | readonly unknown[]>>(
input: I
): RecordBatch<{ [P in keyof I]: ArrayDataType<I[P]> }>;
export function recordBatchFromArrays(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema?: Schema
): RecordBatch {
if (schema) {
const children: Data[] = [];
for (const field of schema.fields) {
const col = input[field.name];
if (col === undefined) {
throw new TypeError(
`Schema field "${field.name}" not found in input. ` +
`Available keys: [${Object.keys(input).join(', ')}]`
);
}
children.push(vectorFromArray(col as any, field.type).data[0]);
}
return new RecordBatch(schema, makeData({ type: new Struct(schema.fields), children }));
}
const dataMap: Record<string, Data> = {};
for (const [key, col] of Object.entries(input)) {
dataMap[key] = vectorFromArray(col).data[0];
}
return new RecordBatch(dataMap as any);
}

/** @ignore */
function ensureSameLengthData<T extends TypeMap = any>(
Expand Down
47 changes: 40 additions & 7 deletions src/table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,15 +454,48 @@ export function makeTable<I extends Record<string | number | symbol, TypedArray>
* })
* ```
*
* @param input Input an object of typed arrays or JavaScript arrays.
* @example
* ```ts
* const schema = new Schema([
* new Field('a', new Int32),
* new Field('b', new Utf8),
* ]);
* const table = tableFromArrays({ a: [1, 2, 3], b: ['x', 'y', 'z'] }, schema);
* ```
*
* @param input An object mapping column names to typed arrays or JavaScript arrays.
* @param schema Optional schema to control column types, ordering, nullability, and metadata.
* @returns A new Table.
*/
export function tableFromArrays<I extends Record<string | number | symbol, TypedArray | BigIntArray | readonly unknown[]>>(input: I): Table<{ [P in keyof I]: ArrayDataType<I[P]> }> {
type T = { [P in keyof I]: ArrayDataType<I[P]> };
const vecs = {} as VectorsMap<T>;
const inputs = Object.entries(input) as [keyof I, I[keyof I]][];
for (const [key, col] of inputs) {
export function tableFromArrays<T extends TypeMap>(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema: Schema<T>
): Table<T>;
export function tableFromArrays<I extends Record<string | number | symbol, TypedArray | BigIntArray | readonly unknown[]>>(
input: I
): Table<{ [P in keyof I]: ArrayDataType<I[P]> }>;
export function tableFromArrays(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema?: Schema
): Table {
if (schema) {
const vecs: Vector[] = [];
for (const field of schema.fields) {
const col = input[field.name];
if (col === undefined) {
throw new TypeError(
`Schema field "${field.name}" not found in input. ` +
`Available keys: [${Object.keys(input).join(', ')}]`
);
}
vecs.push(vectorFromArray(col as any, field.type));
}
const [adjustedSchema, batches] = distributeVectorsIntoRecordBatches(schema, vecs);
return new Table(adjustedSchema, batches);
}
const vecs = {} as Record<string, Vector>;
for (const [key, col] of Object.entries(input)) {
vecs[key] = vectorFromArray(col);
}
return new Table<T>(vecs);
return new Table(vecs);
}
97 changes: 96 additions & 1 deletion test/unit/recordbatch/record-batch-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import '../../jest-extensions.js';
import { arange } from '../utils.js';

import { RecordBatch, makeVector } from 'apache-arrow';
import { RecordBatch, makeVector, recordBatchFromArrays, Schema, Field, Int32, Float32, Float64, Utf8, Dictionary } from 'apache-arrow';

function numsRecordBatch(i32Len: number, f32Len: number) {
return new RecordBatch({
Expand Down Expand Up @@ -130,3 +130,98 @@ describe(`RecordBatch`, () => {
});
});
});

describe(`recordBatchFromArrays()`, () => {
test(`creates a RecordBatch from typed arrays and JavaScript arrays`, () => {
const batch = recordBatchFromArrays({
a: new Float32Array([1, 2, 3]),
b: [4, 5, 6],
c: ['x', 'y', 'z'],
});

expect(batch.numRows).toBe(3);
expect(batch.numCols).toBe(3);
expect(batch.getChild('a')!.type).toBeInstanceOf(Float32);
expect(batch.getChild('b')!.type).toBeInstanceOf(Float64);
expect(batch.getChild('c')!.type).toBeInstanceOf(Dictionary);
});

test(`schema overrides type inference`, () => {
const schema = new Schema([
new Field('a', new Int32),
new Field('b', new Utf8),
]);
const batch = recordBatchFromArrays({ a: [1, 2, 3], b: ['x', 'y', 'z'] }, schema);

expect(batch.numRows).toBe(3);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
expect(batch.getChild('b')!.type).toBeInstanceOf(Utf8);
expect(batch.getChild('a')!.toArray()).toEqual(new Int32Array([1, 2, 3]));
});

test(`schema coerces TypedArray type`, () => {
const schema = new Schema([new Field('a', new Int32)]);
const batch = recordBatchFromArrays({ a: new Float32Array([1, 2, 3]) }, schema);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
expect(batch.getChild('a')!.toArray()).toEqual(new Int32Array([1, 2, 3]));
});

test(`preserves schema metadata`, () => {
const schema = new Schema(
[new Field('a', new Int32)],
new Map([['source', 'test']])
);
const batch = recordBatchFromArrays({ a: [1, 2, 3] }, schema);
expect(batch.schema.metadata.get('source')).toBe('test');
});

test(`throws on missing schema field`, () => {
const schema = new Schema([new Field('c', new Int32)]);
expect(() => recordBatchFromArrays({ a: [1] }, schema)).toThrow(TypeError);
expect(() => recordBatchFromArrays({ a: [1] }, schema)).toThrow(/Schema field "c" not found in input/);
});

test(`handles different length columns via ensureSameLengthData`, () => {
const schema = new Schema([
new Field('a', new Int32),
new Field('b', new Int32),
]);
const batch = recordBatchFromArrays({ a: [1, 2, 3], b: [4, 5] }, schema);
expect(batch.numRows).toBe(3);
expect(batch.getChild('a')!).toHaveLength(3);
expect(batch.getChild('b')!).toHaveLength(3);
expect(batch.getChild('b')!.nullCount).toBe(1);
});

test(`preserves field ordering from schema`, () => {
const schema = new Schema([
new Field('b', new Float64),
new Field('a', new Int32),
]);
const batch = recordBatchFromArrays({ a: [1, 2, 3], b: [4.0, 5.0, 6.0] }, schema);
expect(batch.schema.fields[0].name).toBe('b');
expect(batch.schema.fields[1].name).toBe('a');
expect(batch.getChild('b')!.type).toBeInstanceOf(Float64);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
});

test(`handles empty arrays`, () => {
const schema = new Schema([new Field('a', new Int32)]);
const batch = recordBatchFromArrays({ a: new Int32Array(0) }, schema);
expect(batch.numRows).toBe(0);
expect(batch.numCols).toBe(1);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
});

test(`basic creation without schema infers types`, () => {
const batch = recordBatchFromArrays({
f32: new Float32Array([1, 2]),
nums: [1, 2, 3],
strs: ['a', 'b'],
});

expect(batch.getChild('f32')!.type).toBeInstanceOf(Float32);
expect(batch.getChild('nums')!.type).toBeInstanceOf(Float64);
expect(batch.getChild('strs')!.type).toBeInstanceOf(Dictionary);
});
});
Loading
Loading