You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pt...@apache.org on 2019/03/02 20:34:32 UTC
[arrow] branch master updated: ARROW-4728: [JS] Fix Table#assign
when passed zero-length RecordBatches
This is an automated email from the ASF dual-hosted git repository.
ptaylor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bd03ba8 ARROW-4728: [JS] Fix Table#assign when passed zero-length RecordBatches
bd03ba8 is described below
commit bd03ba8345f3c53b27050337166f7ead25e87a54
Author: ptaylor <pa...@me.com>
AuthorDate: Sat Mar 2 12:34:16 2019 -0800
ARROW-4728: [JS] Fix Table#assign when passed zero-length RecordBatches
Closes https://issues.apache.org/jira/browse/ARROW-4728
Author: ptaylor <pa...@me.com>
Closes #3789 from trxcllnt/js/fix-assign-zero-length-batches and squashes the following commits:
4c6ab56f <ptaylor> fix Table#assign when passed zero-length RecordBatches
---
js/src/util/recordbatch.ts | 40 +++++++++++++++++------------------
js/test/unit/table/serialize-tests.ts | 3 ++-
2 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts
index 40ced20..4551d0c 100644
--- a/js/src/util/recordbatch.ts
+++ b/js/src/util/recordbatch.ts
@@ -68,21 +68,23 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
const fields = [...schema.fields];
const batchArgs = [] as [number, Data<T[keyof T]>[]][];
const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) };
- let sameLength = false, numBatches = 0, batchLength = 0, batchData: Data<T[keyof T]>[];
+
+ let numBatches = 0, batchLength = 0;
+ let i: number = -1, numColumns = columns.length;
+ let child: Data<T[keyof T]>, childData: Data<T[keyof T]>[] = [];
while (memo.numBatches-- > 0) {
- [sameLength, batchLength] = columns.reduce((memo, [chunk]) => {
- const [same, batchLength] = memo;
- const chunkLength = chunk ? chunk.length : batchLength;
- isFinite(batchLength) && same && (memo[0] = chunkLength === batchLength);
- memo[1] = Math.min(batchLength, chunkLength);
- return memo;
- }, [true, Number.POSITIVE_INFINITY] as [boolean, number]);
+ for (batchLength = Number.POSITIVE_INFINITY, i = -1; ++i < numColumns;) {
+ childData[i] = child = columns[i].shift()!;
+ batchLength = Math.min(batchLength, child ? child.length : batchLength);
+ }
- if (isFinite(batchLength) && !(sameLength && batchLength <= 0)) {
- batchData = distributeChildData(fields, batchLength, columns, memo);
- batchLength > 0 && (batchArgs[numBatches++] = [batchLength, batchData]);
+ if (isFinite(batchLength)) {
+ childData = distributeChildData(fields, batchLength, childData, columns, memo);
+ if (batchLength > 0) {
+ batchArgs[numBatches++] = [batchLength, childData.slice()];
+ }
}
}
return [
@@ -92,27 +94,25 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
}
/** @ignore */
-function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
+function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, childData: Data<T[keyof T]>[], columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
let data: Data<T[keyof T]>;
let field: Field<T[keyof T]>;
- let chunks: Data<T[keyof T]>[];
let length = 0, i = -1, n = columns.length;
- const batchData = [] as Data<T[keyof T]>[];
const bitmapLength = ((batchLength + 63) & ~63) >> 3;
while (++i < n) {
- if ((data = (chunks = columns[i]).shift()!) && ((length = data.length) >= batchLength)) {
+ if ((data = childData[i]) && ((length = data.length) >= batchLength)) {
if (length === batchLength) {
- batchData[i] = data;
+ childData[i] = data;
} else {
- batchData[i] = data.slice(0, batchLength);
+ childData[i] = data.slice(0, batchLength);
data = data.slice(batchLength, length - batchLength);
- memo.numBatches = Math.max(memo.numBatches, chunks.push(data));
+ memo.numBatches = Math.max(memo.numBatches, columns[i].unshift(data));
}
} else {
(field = fields[i]).nullable || (fields[i] = field.clone({ nullable: true }) as Field<T[keyof T]>);
- batchData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength)
+ childData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength)
: new Data(field.type, 0, batchLength, batchLength, nullBufs(bitmapLength)) as Data<T[keyof T]>;
}
}
- return batchData;
+ return childData;
}
diff --git a/js/test/unit/table/serialize-tests.ts b/js/test/unit/table/serialize-tests.ts
index a4a2df2..f337913 100644
--- a/js/test/unit/table/serialize-tests.ts
+++ b/js/test/unit/table/serialize-tests.ts
@@ -65,7 +65,8 @@ describe('Table#serialize()', () => {
const chunkLengths = [] as number[];
for (let i = -1; ++i < 3;) {
- chunkLengths[i] = (Math.random() * 100) | 0;
+ chunkLengths[i * 2] = (Math.random() * 100) | 0;
+ chunkLengths[i * 2 + 1] = 0;
const table = <T extends { [key: string]: DataType } = any>(schema: Schema<T>) => createTable(schema, chunkLengths);
test(`Table#select round-trips through serialization`, () => {
const source = table(schema1).select('a', 'c');