You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pt...@apache.org on 2019/03/02 20:34:32 UTC

[arrow] branch master updated: ARROW-4728: [JS] Fix Table#assign when passed zero-length RecordBatches

This is an automated email from the ASF dual-hosted git repository.

ptaylor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new bd03ba8  ARROW-4728: [JS] Fix Table#assign when passed zero-length RecordBatches
bd03ba8 is described below

commit bd03ba8345f3c53b27050337166f7ead25e87a54
Author: ptaylor <pa...@me.com>
AuthorDate: Sat Mar 2 12:34:16 2019 -0800

    ARROW-4728: [JS] Fix Table#assign when passed zero-length RecordBatches
    
    Closes https://issues.apache.org/jira/browse/ARROW-4728
    
    Author: ptaylor <pa...@me.com>
    
    Closes #3789 from trxcllnt/js/fix-assign-zero-length-batches and squashes the following commits:
    
    4c6ab56f <ptaylor> fix Table#assign when passed zero-length RecordBatches
---
 js/src/util/recordbatch.ts            | 40 +++++++++++++++++------------------
 js/test/unit/table/serialize-tests.ts |  3 ++-
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts
index 40ced20..4551d0c 100644
--- a/js/src/util/recordbatch.ts
+++ b/js/src/util/recordbatch.ts
@@ -68,21 +68,23 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
     const fields = [...schema.fields];
     const batchArgs = [] as [number, Data<T[keyof T]>[]][];
     const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) };
-    let sameLength = false, numBatches = 0, batchLength = 0, batchData: Data<T[keyof T]>[];
+
+    let numBatches = 0, batchLength = 0;
+    let i: number = -1, numColumns = columns.length;
+    let child: Data<T[keyof T]>, childData: Data<T[keyof T]>[] = [];
 
     while (memo.numBatches-- > 0) {
 
-        [sameLength, batchLength] = columns.reduce((memo, [chunk]) => {
-            const [same, batchLength] = memo;
-            const chunkLength = chunk ? chunk.length : batchLength;
-            isFinite(batchLength) && same && (memo[0] = chunkLength === batchLength);
-            memo[1] = Math.min(batchLength, chunkLength);
-            return memo;
-        }, [true, Number.POSITIVE_INFINITY] as [boolean, number]);
+        for (batchLength = Number.POSITIVE_INFINITY, i = -1; ++i < numColumns;) {
+            childData[i] = child = columns[i].shift()!;
+            batchLength = Math.min(batchLength, child ? child.length : batchLength);
+        }
 
-        if (isFinite(batchLength) && !(sameLength && batchLength <= 0)) {
-            batchData = distributeChildData(fields, batchLength, columns, memo);
-            batchLength > 0 && (batchArgs[numBatches++] = [batchLength, batchData]);
+        if (isFinite(batchLength)) {
+            childData = distributeChildData(fields, batchLength, childData, columns, memo);
+            if (batchLength > 0) {
+                batchArgs[numBatches++] = [batchLength, childData.slice()];
+            }
         }
     }
     return [
@@ -92,27 +94,25 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
 }
 
 /** @ignore */
-function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
+function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, childData: Data<T[keyof T]>[], columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
     let data: Data<T[keyof T]>;
     let field: Field<T[keyof T]>;
-    let chunks: Data<T[keyof T]>[];
     let length = 0, i = -1, n = columns.length;
-    const batchData = [] as Data<T[keyof T]>[];
     const bitmapLength = ((batchLength + 63) & ~63) >> 3;
     while (++i < n) {
-        if ((data = (chunks = columns[i]).shift()!) && ((length = data.length) >= batchLength)) {
+        if ((data = childData[i]) && ((length = data.length) >= batchLength)) {
             if (length === batchLength) {
-                batchData[i] = data;
+                childData[i] = data;
             } else {
-                batchData[i] = data.slice(0, batchLength);
+                childData[i] = data.slice(0, batchLength);
                 data = data.slice(batchLength, length - batchLength);
-                memo.numBatches = Math.max(memo.numBatches, chunks.push(data));
+                memo.numBatches = Math.max(memo.numBatches, columns[i].unshift(data));
             }
         } else {
             (field = fields[i]).nullable || (fields[i] = field.clone({ nullable: true }) as Field<T[keyof T]>);
-            batchData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength)
+            childData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength)
                 : new Data(field.type, 0, batchLength, batchLength, nullBufs(bitmapLength)) as Data<T[keyof T]>;
         }
     }
-    return batchData;
+    return childData;
 }
diff --git a/js/test/unit/table/serialize-tests.ts b/js/test/unit/table/serialize-tests.ts
index a4a2df2..f337913 100644
--- a/js/test/unit/table/serialize-tests.ts
+++ b/js/test/unit/table/serialize-tests.ts
@@ -65,7 +65,8 @@ describe('Table#serialize()', () => {
 
     const chunkLengths = [] as number[];
     for (let i = -1; ++i < 3;) {
-        chunkLengths[i] = (Math.random() * 100) | 0;
+        chunkLengths[i * 2] = (Math.random() * 100) | 0;
+        chunkLengths[i * 2 + 1] = 0;
         const table = <T extends { [key: string]: DataType } = any>(schema: Schema<T>) => createTable(schema, chunkLengths);
         test(`Table#select round-trips through serialization`, () => {
             const source = table(schema1).select('a', 'c');