Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/factories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,30 @@ export function tableFromJSON<T extends Record<string, unknown>>(array: T[]): Ta
return new Table(batch);
}

/** @ignore */
// Like compareTypes, but ignores Dictionary IDs — which are auto-incremented on
// every construction, so two separately-inferred Dictionary types for the same
// data will always have different IDs even though they are structurally identical.
// Recurses through List children and Struct field types for the same reason.
function compareTypesForInference(a: dtypes.DataType, b: dtypes.DataType): boolean {
if (dtypes.DataType.isDictionary(a) && dtypes.DataType.isDictionary(b)) {
return a.isOrdered === b.isOrdered
&& compareTypes(a.indices, b.indices)
&& compareTypes(a.dictionary, b.dictionary);
}
if (dtypes.DataType.isList(a) && dtypes.DataType.isList(b)) {
return compareTypesForInference(a.children[0].type, b.children[0].type);
}
if (dtypes.DataType.isStruct(a) && dtypes.DataType.isStruct(b)) {
return a.children.length === b.children.length
&& a.children.every((f, i) =>
f.name === b.children[i].name
&& compareTypesForInference(f.type, b.children[i].type)
);
}
return compareTypes(a, b);
}

/** @ignore */
function inferType<T extends readonly unknown[]>(values: T): JavaScriptArrayDataType<T>;
function inferType(value: readonly unknown[]): dtypes.DataType {
Expand Down Expand Up @@ -149,7 +173,7 @@ function inferType(value: readonly unknown[]): dtypes.DataType {
} else if (arraysCount + nullsCount === value.length) {
const array = value as Array<unknown>[];
const childType = inferType(array[array.findIndex((ary) => ary != null)]);
if (array.every((ary) => ary == null || compareTypes(childType, inferType(ary)))) {
if (array.every((ary) => ary == null || compareTypesForInference(childType, inferType(ary)))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we just passed the dictionaryId down through inferType? e.g.

Suggested change
if (array.every((ary) => ary == null || compareTypesForInference(childType, inferType(ary)))) {
if (array.every((ary) => ary == null || compareTypes(childType, inferType(ary, dtypes.DataType.isDictionary(childType) ? childType.id : null)))) {

Then we could pass the dictionary id into the Dictionary constructor on L142?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would probably need to maintain a Map of key -> dictionary id in order to do the same thing for dictionary-encoded struct children.

return new dtypes.List(new Field('', childType, true));
}
} else if (objectsCount + nullsCount === value.length) {
Expand Down
32 changes: 32 additions & 0 deletions test/unit/table/table-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ describe('tableFromArrays()', () => {
expect(table.getChild('d')!.type).toBeInstanceOf(Dictionary);
expect(table.getChild('e' as any)).toBeNull();
});

test(`creates table from arrays of strings`, () => {
const table = tableFromArrays({ word: [['a', 'b'], ['c', 'd']] });
expect(table.numRows).toBe(2);
const word = table.getChild('word')!;
expect(word.get(0)!.toArray()).toEqual(['a', 'b']);
expect(word.get(1)!.toArray()).toEqual(['c', 'd']);
});

test(`creates table from arrays of objects containing strings`, () => {
const table = tableFromArrays({ customers: [{ names: ['joe'] }, { names: ['bob'] }] });
expect(table.numRows).toBe(2);
const customers = table.getChild('customers')!;
expect(customers.get(0)!.names.toArray()).toEqual(['joe']);
expect(customers.get(1)!.names.toArray()).toEqual(['bob']);
});
});


Expand All @@ -78,4 +94,20 @@ describe('tableFromJSON()', () => {
expect(table.getChild('b')!.type).toBeInstanceOf(Bool);
expect(table.getChild('c')!.type).toBeInstanceOf(Dictionary);
});

test(`handles arrays of strings`, () => {
const t1 = tableFromJSON([{ a: ['hi'] }]);
expect(t1.getChild('a')!.get(0)!.toArray()).toEqual(['hi']);

const t2 = tableFromJSON([{ a: ['hi', 'there'] }]);
expect(t2.getChild('a')!.get(0)!.toArray()).toEqual(['hi', 'there']);
});

test(`handles nested objects containing strings`, () => {
const table = tableFromJSON([{ a: [{ b: 'hi' }, { b: 'there' }] }]);
expect(table.numRows).toBe(1);
const rows = table.getChild('a')!.get(0)!;
expect(rows.get(0)!.b).toBe('hi');
expect(rows.get(1)!.b).toBe('there');
});
});